<a href="https://colab.research.google.com/github/t-ito-takumi/corabo_test/blob/master/mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

準備

In [0]:
import os,re
import keras
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras import models
from keras.models import Model
from keras import Input
from keras.layers import Activation, Conv2D, MaxPooling2D, Flatten, Dense
from keras.callbacks import TensorBoard, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

In [0]:
# 学習データとテストデータを取得する。
(_x_train_val, _y_train_val), (_x_test, _y_test) = mnist.load_data()
# 学習中の検証データがないので、train_test_split()を使って学習データ8割、検証データを2割に分割する。test_sizeが検証データの割合になっている。
_x_train, _x_val, _y_train, _y_val = train_test_split(_x_train_val, _y_train_val, test_size=0.2)

print("x_train   : ", _x_train.shape) # x_train   :  (48000, 28, 28)
print("y_train   : ", _y_train.shape) # y_train   :  (48000,)
print("x_val      : ", _x_val.shape) # x_val      :  (12000, 28, 28)
print("y_val      : ", _y_val.shape) # y_val      :  (12000,)
print("x_test    : ", _x_test.shape) # x_test    :  (10000, 28, 28)
print("y_test    : ", _y_test.shape) # y_test    :  (10000,)

学習データの一部を表示

In [0]:
plt.figure(figsize=(10,10))

# MNISTの0から9の画像をそれぞれ表示する。
for i in range(10):
    data = [(x,t) for x, t in zip(_x_train, _y_train) if t == i]
    x, y = data[0]

    plt.subplot(5,2, i+1)
    # plt.title()はタイトルを表示する。ここでは画像枚数を表示している。
    plt.title("len={}".format(len(data)))
    # 画像を見やすいように座標軸を非表示にする。
    plt.axis("off")
    plt.imshow(x, cmap='gray')

plt.tight_layout()
plt.show()

In [0]:
# 学習、検証、テストデータの前処理用関数。
def preprocess(data, label=False):
    if label:
        # 教師データはto_categorical()でone-hot-encodingする。
        data = to_categorical(data)
    else:
        # 入力画像は、astype('float32')で型変換を行い、レンジを0-1にするために255で割る。
        # 0-255 -> 0-1
        data = data.astype('float32') / 255
        # Kerasの入力データの形式は(ミニバッチサイズ、横幅、縦幅、チャネル数)である必要があるので、reshape()を使って形式を変換する。
        # (sample, width, height) -> (sample, width, height, channel)
        data = data.reshape((-1, 28, 28, 1))

    return data

In [0]:
x_train = preprocess(_x_train)
x_val= preprocess(_x_val)
x_test = preprocess(_x_test)

y_train = preprocess(_y_train, label=True)
y_val = preprocess(_y_val, label=True)
y_test = preprocess(_y_test, label=True)

print(x_train.shape) # (48000, 28, 28, 1)
print(x_val.shape) # (12000, 28, 28, 1)
print(x_test.shape) # (10000, 28, 28, 1)
print(x_train.max()) # 1.0
print(x_val.max()) # 1.0
print(y_test.max()) # 1.0
print(y_train.shape) # (48000, 10)
print(y_val.shape) # (12000, 10) 
print(y_test.shape) # (10000, 10)

モデルの定義

In [0]:
def model_sequential():
    activation = 'relu'

    model = models.Sequential()

    model.add(Conv2D(32, (3, 3), padding='same', name='conv1', input_shape=(28, 28 , 1)))
    model.add(Activation(activation, name='act1'))
    model.add(MaxPooling2D((2, 2), name='pool1'))

    model.add(Conv2D(64, (3, 3), padding='same', name='conv2'))
    model.add(Activation(activation, name='act2'))
    model.add(MaxPooling2D((2, 2), name='pool2'))

    model.add(Conv2D(64, (3, 3), padding='same', name='conv3'))
    model.add(Activation(activation, name='act3'))

    model.add(Flatten(name='flatten'))
    model.add(Dense(64, name='dense4'))
    model.add(Activation(activation, name='act4'))
    model.add(Dense(10, name='dense5'))
    model.add(Activation('softmax', name='last_act'))

    return model

In [0]:
def model_functional_api():
    activation = 'relu'

    input = Input(shape=(28, 28, 1))

    x = Conv2D(32, (3,3), padding='same', name='conv1')(input)
    x = Activation(activation, name='act1')(x)
    x = MaxPooling2D((2,2), name='pool1')(x)

    x = Conv2D(64, (3,3), padding='same', name='conv2')(x)
    x = Activation(activation, name='act2')(x)
    x = MaxPooling2D((2,2), name='pool2')(x)

    x = Conv2D(64, (3,3), padding='same', name='conv3')(x)
    x = Activation(activation, name='act3')(x)

    x = Flatten(name='flatten')(x)
    x = Dense(64, name='dense4')(x)
    x = Activation(activation, name='act4')(x)
    x = Dense(10, name='dense5')(x)
    output = Activation('softmax', name='last_act')(x)

    model = Model(input, output)

    return model

モデルのコンパイル

In [0]:
model = model_sequential()
model.summary()
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

コールバックの設定

In [0]:
ckpt_name = 'weights-{epoch:02d}-{loss:.2f}-{acc:.2f}-{val_loss:.2f}-{val_acc:.2f}-.hdf5'
log_dir = "logs"
cbs = [
        TensorBoard(log_dir=log_dir),
        ModelCheckpoint(os.path.join(log_dir, ckpt_name),
                                   monitor='val_acc', verbose=0,
                                   save_best_only=False,
                                   save_weights_only=True,
                                   mode='auto', period=1)
]

データの準備

In [0]:
datagen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=False,
    vertical_flip=False)

In [0]:
batch_size=128
epochs=10
verbose=1
steps_per_epoch = x_train.shape[0] // batch_size

history = model.fit_generator(
    datagen.flow(x_train, y_train, batch_size=batch_size),
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=(x_val, y_val),
    callbacks=cbs,
    verbose=verbose)

学習ログの表示

In [0]:
acc = history.history['acc']
val_acc = history.history['val_acc']

epochs = range(1, len(acc)+1)
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Val accuracy')
plt.legend()
plt.show()

In [0]:
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss)+1 )
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Val loss')
plt.legend()
plt.show()

In [0]:
def key_sort_by_num(x):
    re_list = re.findall(r"[0-9]+", x)
    re_list = list(map(int, re_list))
    return re_list


def list_from_dir(dir, target_ext=None):
    data_list = []
    fnames = os.listdir(dir)
    fnames = sorted(fnames, key=key_sort_by_num)
    for fname in fnames:
        if target_ext is None:
            path = os.path.join(dir, fname)
            data_list.append(path)
        else:
            _, ext = os.path.splitext(fname)
            if ext.lower() in target_ext:
                path = os.path.join(dir, fname)
                data_list.append(path)
    return data_list


def latest_weight(log_dir):
    weight_paths = list_from_dir(log_dir, '.hdf5')
    return weight_paths[-1]

モデルの評価

In [0]:
model = model_sequential()

ckpt = latest_weight(log_dir)
model.load_weights(ckpt)

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


score = model.evaluate(x_test,  y_test)
print(list(zip(model.metrics_names, score)))


テストデータの表示

In [0]:
plt.figure(figsize=(10,10))

for i in range(10):
    data = [(x,t) for x, t in zip(_x_test, _y_test) if t == i]
    x, y = data[0]

    pred = model.predict(preprocess(x, label=False))

    ans = np.argmax(pred)
    score = np.max(pred) * 100

    plt.subplot(5,2, i +1)
    plt.axis("off")
    plt.title("ans={} score={}\n{}".format(ans, score,ans==y))

    plt.imshow(x, cmap='gray')


plt.tight_layout()
plt.show()