In [1]:
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Input, Flatten, Dense, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D
from keras.layers import Concatenate
from keras.utils import plot_model

# define parameters
batch_size, num_classes, epochs, = 128, 10, 20
learning_rate, dropout_rate = 0.0005, 0.4

#padding_type = 'same'
activation_type = 'relu'

#alpha = 0.1
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# change data shape and get the input shape for the network.
if keras.backend.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)    

#x_train = np.pad(x_train, ((0,0),(98,98),(98,98),(0,0)), 'constant')
#x_test = np.pad(x_test, ((0,0),(98,98),(98,98),(0,0)), 'constant')
#input_shape = x_train[0].shape

x_train = x_train.astype('float32')/255 # change this to 32 if u have non RTX gpu
x_test = x_test.astype('float32')/255 # change this to 32 if u have non RTX gpu

print('x_train shape is:',x_train.shape)
print('train sample size is:',x_train.shape[0])
print('test sample size is:',x_test.shape[0])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# create model
def inception(x, filters):
    # 1x1
    path1 = Conv2D(filters=filters[0], kernel_size=(1,1), strides=1, padding='same', activation=activation_type)(x)

    # 1x1->3x3
    path2 = Conv2D(filters=filters[1][0], kernel_size=(1,1), strides=1, padding='same', activation=activation_type)(x)
    path2 = Conv2D(filters=filters[1][1], kernel_size=(3,3), strides=1, padding='same', activation=activation_type)(path2)
    
    # 1x1->5x5
    path3 = Conv2D(filters=filters[2][0], kernel_size=(1,1), strides=1, padding='same', activation=activation_type)(x)
    path3 = Conv2D(filters=filters[2][1], kernel_size=(5,5), strides=1, padding='same', activation=activation_type)(path3)

    # 3x3->1x1
    path4 = MaxPooling2D(pool_size=(3,3), strides=1, padding='same')(x)
    path4 = Conv2D(filters=filters[3], kernel_size=(1,1), strides=1, padding='same', activation=activation_type)(path4)

    return Concatenate(axis=-1)([path1,path2,path3,path4])


def auxiliary(x, name=None):
    layer = AveragePooling2D(pool_size=(5,5), strides=3, padding='valid')(x)
    layer = Conv2D(filters=128, kernel_size=(1,1), strides=1, padding='same', activation=activation_type)(layer)
    layer = Flatten()(layer)
    layer = Dense(units=256, activation=activation_type)(layer)
    layer = Dropout(dropout_rate)(layer)
    layer = Dense(units=num_classes, activation='softmax', name=name)(layer)
    return layer


def googlenet():
    layer_in = Input(shape=input_shape)
    padded = keras.layers.ZeroPadding2D(padding=((98, 98), (98, 98)))(layer_in) 
    # stage-1
    layer = Conv2D(filters=64, kernel_size=(7,7), strides=2, padding='same', activation=activation_type)(padded)
    layer = MaxPooling2D(pool_size=(3,3), strides=2, padding='same')(layer)
    layer = BatchNormalization()(layer)

    # stage-2
    layer = Conv2D(filters=64, kernel_size=(1,1), strides=1, padding='same', activation=activation_type)(layer)
    layer = Conv2D(filters=192, kernel_size=(3,3), strides=1, padding='same', activation=activation_type)(layer)
    layer = BatchNormalization()(layer)
    layer = MaxPooling2D(pool_size=(3,3), strides=2, padding='same')(layer)

    # stage-3
    layer = inception(layer, [ 64,  (96,128), (16,32), 32]) #3a
    layer = inception(layer, [128, (128,192), (32,96), 64]) #3b
    layer = MaxPooling2D(pool_size=(3,3), strides=2, padding='same')(layer)
    
    # stage-4
    layer = inception(layer, [192,  (96,208),  (16,48),  64]) #4a
    aux1  = auxiliary(layer, name='aux1')
    layer = inception(layer, [160, (112,224),  (24,64),  64]) #4b
    layer = inception(layer, [128, (128,256),  (24,64),  64]) #4c
    layer = inception(layer, [112, (144,288),  (32,64),  64]) #4d
    aux2  = auxiliary(layer, name='aux2')
    layer = inception(layer, [256, (160,320), (32,128), 128]) #4e
    layer = MaxPooling2D(pool_size=(3,3), strides=2, padding='same')(layer)
    
    # stage-5
    layer = inception(layer, [256, (160,320), (32,128), 128]) #5a
    layer = inception(layer, [384, (192,384), (48,128), 128]) #5b
    layer = AveragePooling2D(pool_size=(7,7), strides=1, padding='valid')(layer)
    
    # stage-6
    layer = Flatten()(layer)
    layer = Dropout(dropout_rate)(layer)
    layer = Dense(units=256, activation='linear')(layer)
    main = Dense(units=num_classes, activation='softmax', name='main')(layer)
    
    model = Model(inputs=layer_in, outputs=[main, aux1, aux2])
    
    return model

# train model
model = googlenet()
plot_model(model, to_file='googlenet.png',show_shapes=True, show_layer_names=True)
model.summary()

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.RMSprop(learning_rate,0.9),
              metrics=['accuracy'])
# model Training
results = model.fit(x_train, [y_train,y_train,y_train],validation_split=.1,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1)
# Evaluation of data using trained weights
score = model.evaluate(x_test, [y_test,y_test,y_test], verbose=0)
# Print loss and Accuracy
print('Test loss:', score[0])
print('Test main loss:', score[1])
print('Test aux 1 loss:', score[2])
print('Test aux 2 loss:', score[3])
print('Test main accuracy:', score[4])
print('Test aux 1 accuracy:', score[5])
print('Test aux 2 accuracy:', score[6])

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


x_train shape is: (60000, 28, 28, 1)
train sample size is: 60000
test sample size is: 10000
Instructions for updating:
Colocations handled automatically by placer.
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 224, 224, 1)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 112, 112, 64) 3200        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling

Instructions for updating:
Use tf.cast instead.
Train on 54000 samples, validate on 6000 samples
Epoch 1/20
  256/54000 [..............................] - ETA: 1:43:07 - loss: 12.0933 - main_loss: 5.7490 - aux1_loss: 3.5301 - aux2_loss: 2.8143 - main_accuracy: 0.1172 - aux1_accuracy: 0.1211 - aux2_accuracy: 0.0938

KeyboardInterrupt: 