In [0]:
from keras.models import Model
from keras.layers import Input, Activation, merge, Dense, Flatten, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import BatchNormalization, add, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator

from keras.datasets import cifar10
from keras.utils import np_utils

import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import toimage

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Using TensorFlow backend.


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import toimage
from sklearn.model_selection import train_test_split
cifar10_labels = np.array([
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck'])

In [0]:
# floatに変換しておく
# 正規化するために
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# 正規化
X_train /= 255.0
X_test /= 255.0
# RGB各要素について0-255の値で色を表現する、例えば白は（R 255、G 255、B 255）で表現

# one-hot vector に変換したほうが都合が良い
# たとえば 4 ではなくて [0, 0, 0, 0, 1, 0, 0, 0, 0, 0] のようなデータにする
# こうしないとクラス分類ではなくて回帰として扱われてしまいうまくいかない
n_classes = 10
Y_train = np_utils.to_categorical(y_train, n_classes)
Y_test = np_utils.to_categorical(y_test, n_classes)

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
#    x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
 #   x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
 #   x = _resblock(n_filters=128)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
#    x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
  

    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
model = resnet()
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
batch_normalization_136 (BatchN (None, 32, 32, 3)    12          input_12[0][0]                   
__________________________________________________________________________________________________
activation_136 (Activation)     (None, 32, 32, 3)    0           batch_normalization_136[0][0]    
__________________________________________________________________________________________________
conv2d_133 (Conv2D)             (None, 32, 32, 64)   256         activation_136[0][0]             
__________________________________________________________________________________________________
batch_norm

In [0]:
#checkpoint
callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 50
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
  79/1000 [=>............................] - ETA: 1:44 - loss: 0.5558 - acc: 0.8081

KeyboardInterrupt: ignored

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
#    x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
 #   x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
 #   x = _resblock(n_filters=128)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
#    x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
#    x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)

  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
model = resnet()
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
batch_normalization_196 (BatchN (None, 32, 32, 3)    12          input_15[0][0]                   
__________________________________________________________________________________________________
activation_196 (Activation)     (None, 32, 32, 3)    0           batch_normalization_196[0][0]    
__________________________________________________________________________________________________
conv2d_181 (Conv2D)             (None, 32, 32, 64)   256         activation_196[0][0]             
__________________________________________________________________________________________________
batch_norm

In [0]:
#checkpoint
callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 50
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
  59/1000 [>.............................] - ETA: 2:24 - loss: 0.3049 - acc: 0.8932Buffered data was truncated after reaching the output size limit.

In [0]:
import numpy as np
import matplotlib.pyplot as plt

epoch = np.arange(len(model.history['acc']))

# accuracy と validation accuracy の推移をプロットする
plt.title('Accuracy')
plt.ylim(0.0, 1.0)
plt.plot(epoch, model.history['acc'], label='train')
plt.plot(epoch, model.history['val_acc'], label='validation')
plt.legend()
plt.show()

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#    x = Dropout(0.2)(x)
  
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=128)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
model = resnet()
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
batch_normalization_77 (BatchNo (None, 32, 32, 3)    12          input_4[0][0]                    
__________________________________________________________________________________________________
activation_77 (Activation)      (None, 32, 32, 3)    0           batch_normalization_77[0][0]     
__________________________________________________________________________________________________
conv2d_65 (Conv2D)              (None, 32, 32, 64)   256         activation_77[0][0]              
__________________________________________________________________________________________________
batch_norm

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#    x = Dropout(0.2)(x)
  
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=128)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
model = resnet()
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

In [0]:
#checkpoint
#callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 30
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [0]:
import numpy as np
import matplotlib.pyplot as plt

epoch = np.arange(len(h.history['acc']))

# accuracy と validation accuracy の推移をプロットする
plt.title('Accuracy')
plt.ylim(0.0, 1.0)
plt.plot(epoch, h.history['acc'], label='train')
plt.plot(epoch, h.history['val_acc'], label='validation')
plt.legend()
plt.show()

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    
    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
  #  x = Dropout(0.3)(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#    x = Dropout(0.2)(x)
  
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=128)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
model = resnet()
model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
batch_normalization_41 (BatchNo (None, 32, 32, 3)    12          input_3[0][0]                    
__________________________________________________________________________________________________
activation_41 (Activation)      (None, 32, 32, 3)    0           batch_normalization_41[0][0]     
__________________________________________________________________________________________________
batch_normalization_42 (BatchNo (None, 32, 32, 3)    12          activation_41[0][0]              
__________________________________________________________________________________________________
activation

In [0]:
#checkpoint
#callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 30
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
 213/1667 [==>...........................] - ETA: 2:55 - loss: 0.4363 - acc: 0.8477

オプティマイザを変更したら精度向上

opt = Adam(amsgrad=True)  
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#    x = Dropout(0.2)(x)
  
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=128)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
opt = Adam(amsgrad=True)  

model = resnet()
#model.compile(optimizer='sgd', loss='categorical_crossentropy',
model.compile(optimizer=opt, loss='categorical_crossentropy',
              metrics=['accuracy'])

#model.summary()

#checkpoint
#callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 30
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100

Data Augumentation のパラメータを変更するも制度は向上せず

        rotation_range=45,  
        width_shift_range=0.4, 
        height_shift_range=0.4,  

In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    weight_decay = 0.01
#    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same', kernel_regularizer=regularizers.l2(weight_decay), activity_regularizer=regularizers.l1(weight_decay))(inputs)
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

#    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same', kernel_regularizer=regularizers.l2(weight_decay), activity_regularizer=regularizers.l1(weight_decay))(x)    
    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

#    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same', kernel_regularizer=regularizers.l2(weight_decay), activity_regularizer=regularizers.l1(weight_decay))(x)
    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#    x = Dropout(0.2)(x)
  
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=128)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model

model = resnet()
opt = Adam(amsgrad=True)  
#model.compile(optimizer='sgd', loss='categorical_crossentropy',
model.compile(optimizer=opt, loss='categorical_crossentropy',
              metrics=['accuracy'])

#model.summary()

#checkpoint
#callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 30
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=45,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.4,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.4,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

KeyboardInterrupt: ignored

weight_decay = 0.01
kernel_regularizer=regularizers.l2(weight_decay), activity_regularizer=regularizers.l1(weight_decay)

上記の正則化はあまりこうかなし

↓_resblockの後の Batch Normalization と Activation の削除も精度が悪化したので、
data augumentation のパラメータを調整し再実行




In [0]:
#
# ResNet のモデルを定義するセル
#

# inputs と residual 2つの処理結果を受け取って、両者を足す

# inputs：ショートカットせずに畳み込みやReLuなどをしたあとの信号
# residual：畳み込みなどを通らずショートカットして流れてくる信号のこと、つまりモジュールに入ってくる信号と同じ
def _shortcut(inputs, residual):
  
  # residual のほうのフィルタ数を取得する
  # ちなみにデフォルトでは
  # _keras_shape[1] 画像の幅
  # _keras_shape[2] 画像の高さ
  # _keras_shape[3] チャンネル数
  # チャンネル数、幅、高さの順番のこともあるが、今回はデフォルトでOK
  n_filters = residual._keras_shape[3]
  
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  shortcut = Convolution2D(n_filters, (1,1), strides=(1,1),kernel_initializer='glorot_normal', padding='same')(inputs)
 
  # 2つを足す
  return add([shortcut, residual])


# ResBlock を定義
# ここでの処理は BatchNorm → ReLU → Conv とシンプルなものにしてあるが、
# ここを色々変更する改良案が無数にある
###ResBlock とは、ショートカット構造のあるレイヤーの組み合わせ（ブロック）のこと
def _resblock(n_filters, strides=(1,1)):
  def f(inputs):  
    x = Convolution2D(n_filters, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters, (3,3), strides=strides,kernel_initializer='he_normal', padding='same')(x)    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Convolution2D(n_filters * 4, (1,1), strides=strides,kernel_initializer='he_normal', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 入力そのものと、BN→ReLU→Conv したものとを足す
    # この部分がResNetのもっとも重要な点
    return _shortcut(inputs, x)
  
  return f

def resnet():
    inputs = Input(shape=(32, 32, 3))

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = _resblock(n_filters=64)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=64)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#    x = Dropout(0.2)(x)
  
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=128)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
    
    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)

    x = MaxPooling2D(strides=(2,2))(x)
    
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = _resblock(n_filters=512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#    x = _resblock(n_filters=256)(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
    #    x = Dropout(0.2)(x)
  
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model
  
opt = Adam(amsgrad=True)  

model = resnet()
#model.compile(optimizer='sgd', loss='categorical_crossentropy',
model.compile(optimizer=opt, loss='categorical_crossentropy',
              metrics=['accuracy'])

#model.summary()

#checkpoint
#callback_op = ModelCheckpoint(filepath='weights.{epoch:02d}.hdf5')
callback_es = EarlyStopping(monitor='val_acc', patience=10,
                            mode='auto', verbose=1)

batch_size = 30
num_epochs = 100 # 普通は100〜300くらいを指定することが多い

#h = model.fit(X_train, Y_train,
#               batch_size=batch_size,
#               epochs=epochs,
#               validation_split=0.2,
#               callbacks=[callback_op, callback_es])

from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=40,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest',
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
                                  batch_size=batch_size),
                                  epochs=num_epochs,
                                  validation_data=(X_test, Y_test),
                                  steps_per_epoch=int(np.ceil(X_train.shape[0] / float(batch_size))),
                                  workers=1)

model.save('model.hdf5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
 120/1667 [=>............................] - ETA: 3:47 - loss: 0.1370 - acc: 0.9503