In [1]:
%matplotlib inline

import os

import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, Convolution2D, MaxPooling2D, Flatten, Input, Activation, add, Add, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

from IPython.display import SVG
from tensorflow.python.keras.utils.vis_utils import model_to_dot

from tensorflow.examples.tutorials.mnist import input_data

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [21]:
def Shortcut(input_data, fx, data_format = "channels_first"):

  # チャンネル数の取得
  if data_format == "channels_first":
    channel_num = int(fx.shape[1])
  else:
    channel_num = int(fx.shape[3])
      
  # inputs と residual とでチャネル数が違うかもしれない。
  # そのままだと足せないので、1x1 conv を使って residual 側のフィルタ数に合わせている
  buf = Convolution2D(channel_num, (1,1), strides=(1,1), padding='valid', data_format=data_format)(input_data)

  print("input_data.shape = {0}, buf.shape = {1}".format(input_data.shape, buf.shape))

  # 2つを足す
  return Add()([buf, fx])

In [22]:
def Resblock(n_filters, strides=(1,1), data_format = "channels_first", axis=1):
  def f(input_data):   
    print("input_data.shape = {0}".format(input_data.shape))
    fx = Convolution2D(n_filters, (3,3), strides=strides, kernel_initializer='he_normal', padding='same', data_format=data_format)(input_data)
    fx = BatchNormalization(axis=axis)(fx)
    fx = Activation('relu')(fx)
    fx = Convolution2D(n_filters, (3,3), strides=strides, kernel_initializer='he_normal', padding='same', data_format=data_format)(fx)
    fx = BatchNormalization(axis=axis)(fx)

    return Shortcut(input_data, fx, data_format)

  return f

In [32]:
def Resnet(is_channels_first = True):
  if is_channels_first:
    data_format = "channels_first"
    axis=1
    input_data = Input(shape=(1, 28, 28))
  else:
    data_format = "channels_last"
    axis=-1
    input_data = Input(shape=(28, 28, 1))
  
  x = Convolution2D(32, (7,7), strides=(1,1), kernel_initializer='he_normal', padding='same', data_format=data_format)(input_data)
  x = BatchNormalization(axis=axis)(x)
  x = Activation('relu')(x)
  x = MaxPooling2D((3, 3), strides=(2,2), padding='same', data_format=data_format)(x)


  x = Resblock(n_filters=64, data_format = data_format, axis=axis)(x)
  x = Resblock(n_filters=64, data_format = data_format, axis=axis)(x)
  x = Resblock(n_filters=64, data_format = data_format, axis=axis)(x)
  x = MaxPooling2D(strides=(2,2), data_format=data_format)(x)  
  x = Resblock(n_filters=128, data_format = data_format, axis=axis)(x)
  x = Resblock(n_filters=128, data_format = data_format, axis=axis)(x)
  x = Resblock(n_filters=128, data_format = data_format, axis=axis)(x)


  x =  GlobalAveragePooling2D(data_format=data_format)(x)
  x = Dense(10, kernel_initializer='he_normal', activation='softmax')(x)

  model = Model(inputs=input_data, outputs=x)
  return model

In [39]:
model = Resnet(is_channels_first=False)
adam = keras.optimizers.Adam()
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

input_data.shape = (?, 14, 14, 32)
input_data.shape = (?, 14, 14, 32), buf.shape = (?, 14, 14, 64)
input_data.shape = (?, 14, 14, 64)
input_data.shape = (?, 14, 14, 64), buf.shape = (?, 14, 14, 64)
input_data.shape = (?, 14, 14, 64)
input_data.shape = (?, 14, 14, 64), buf.shape = (?, 14, 14, 64)
input_data.shape = (?, 7, 7, 64)
input_data.shape = (?, 7, 7, 64), buf.shape = (?, 7, 7, 128)
input_data.shape = (?, 7, 7, 128)
input_data.shape = (?, 7, 7, 128), buf.shape = (?, 7, 7, 128)
input_data.shape = (?, 7, 7, 128)
input_data.shape = (?, 7, 7, 128), buf.shape = (?, 7, 7, 128)


In [40]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
conv2d_74 (Conv2D)              (None, 28, 28, 32)   1600        input_9[0][0]                    
__________________________________________________________________________________________________
batch_normalization_52 (BatchNo (None, 28, 28, 32)   128         conv2d_74[0][0]                  
__________________________________________________________________________________________________
activation_30 (Activation)      (None, 28, 28, 32)   0           batch_normalization_52[0][0]     
____________________________________________________________________________________________

In [45]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train_shape = x_train.shape
x_test_shape = x_test.shape

#x_train = x_train.reshape(x_train_shape[0], 1, x_train_shape[1], x_train_shape[2])
#x_test = x_test.reshape(x_test_shape[0], 1, x_test_shape[1], x_test_shape[2])
x_train = x_train.reshape(x_train_shape[0], x_train_shape[1], x_train_shape[2], 1)
x_test = x_test.reshape(x_test_shape[0], x_test_shape[1], x_test_shape[2], 1)

x_train_new = x_train.astype('float16')
x_test_new = x_test.astype('float16')
x_train_new /= 255
x_test_new /= 255

num_classes = 10
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [46]:
history = model.fit(x_train, y_train,
                    batch_size=128,
                    epochs=10,
                    verbose=1,
                    validation_data=(x_test_new, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 5632/60000 [=>............................] - ETA: 9:20 - loss: 0.4908 - acc: 0.8528

KeyboardInterrupt: 