GoogleNet CNN model for MNIST dataset classification

In [1]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(trainX, trainy), (testX, testy) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [2]:
trainX = trainX[:1000]
trainy = trainy[:1000]

testX = testX[:100]
testy = testy[:100]

trainX = tf.map_fn(lambda i: tf.stack([i]*3, axis=-1), trainX).numpy()
testX = tf.map_fn(lambda i: tf.stack([i]*3, axis=-1), testX).numpy()

trainX= tf.image.resize(trainX, [224,224]).numpy()
testX= tf.image.resize(testX, [224,224]).numpy()
testX.shape

(100, 224, 224, 3)

In [3]:
trainX = trainX/255.0
testX = testX/255.0

#Validation set
INDEX = int(0.8 * len(trainX))

trainX = trainX[:INDEX]
trainy = trainy[:INDEX]

val_x = trainX[INDEX:]
val_y = trainy[INDEX:]


In [4]:
from tensorflow.keras import layers


Inception Module

In [5]:
def inception(x, filters_1_1, filters_3_3_reduce, filters_3_3, filters_5_5_reduce, filters_5_5, filter_pool):
  path1=layers.Conv2D(filters_1_1, (1,1), padding='same', activation='relu')(x)
  path2=layers.Conv2D(filters_3_3_reduce, (1,1), padding='same', activation='relu')(x)
  path2=layers.Conv2D(filters_3_3, (3,3), padding='same', activation='relu')(path2)
  path3=layers.Conv2D(filters_5_5_reduce, (1,1), padding='same', activation='relu')(x)
  path3=layers.Conv2D(filters_5_5, (5,5), padding='same', activation='relu')(path3)
  path4=layers.MaxPool2D((3,3), strides=(1,1), padding='same')(x)
  path4=layers.Conv2D(filter_pool, (1,1), padding='same', activation='relu')(path4)
  return tf.concat([path1, path2, path3, path4], axis=3)

Step1: Define the model

In [6]:
inp = layers.Input(shape=(224,224,3))
x = layers.Conv2D(64, 7, strides=2, padding='same', activation='relu')(inp)
x = layers.MaxPooling2D(3, strides=2)(x)
x = layers.Conv2D(64, 1, strides=1, padding='same', activation='relu')(x)
x = layers.Conv2D(192, 3, strides=1, padding='same', activation='relu')(x)
x = layers.MaxPooling2D(3, strides=2)(x)
x = inception(x, filters_1_1=64, filters_3_3_reduce=96, filters_3_3=128, filters_5_5_reduce=16, filters_5_5=32, filter_pool=32)
x = inception(x, filters_1_1=128, filters_3_3_reduce=128, filters_3_3=192, filters_5_5_reduce=32, filters_5_5=96, filter_pool=64)
x = layers.MaxPooling2D(3, strides=2)(x)
x = inception(x, filters_1_1=192, filters_3_3_reduce=96, filters_3_3=208, filters_5_5_reduce=16, filters_5_5=48, filter_pool=64)
aux1 = layers.AveragePooling2D((5,5), strides=3)(x)
aux1 = layers.Conv2D(128, 1, padding='same', activation='relu')(aux1)
aux1=layers.Flatten()(aux1)
aux1 =layers.Dense(1024, activation='relu')(aux1)
aux1 = layers.Dropout(0.7)(aux1)
aux1= layers.Dense(10, activation='softmax')(aux1)
x = inception(x, filters_1_1=160, filters_3_3_reduce=112, filters_3_3=224, filters_5_5_reduce=24, filters_5_5=64, filter_pool=64)
x = inception(x, filters_1_1=128, filters_3_3_reduce=128, filters_3_3=256, filters_5_5_reduce=24, filters_5_5=64, filter_pool=64)
x = inception(x, filters_1_1=112, filters_3_3_reduce=144, filters_3_3=288, filters_5_5_reduce=32, filters_5_5=64, filter_pool=64)
aux2 = layers.AveragePooling2D((5,5), strides=3)(x)
aux2 = layers.Conv2D(128, 1, padding='same', activation='relu')(aux2)
aux2 = layers.Flatten()(aux2)
aux2 =layers.Dense(1024, activation='relu')(aux2)
aux2 = layers.Dropout(0.7)(aux2)
aux2= layers.Dense(10, activation='softmax')(aux2)
x = inception(x, filters_1_1=256, filters_3_3_reduce=160, filters_3_3=320, filters_5_5_reduce=32, filters_5_5=128, filter_pool=128)
x = layers.MaxPooling2D(3, strides=2)(x)
x = inception(x, filters_1_1=256, filters_3_3_reduce=160, filters_3_3=320, filters_5_5_reduce=32, filters_5_5=128, filter_pool=128)
x = inception(x, filters_1_1=384, filters_3_3_reduce=192, filters_3_3=384, filters_5_5_reduce=48, filters_5_5=128, filter_pool=128)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)
out= layers.Dense(10, activation='softmax')(x)

In [7]:
from tensorflow.keras import models
from tensorflow.keras import Model
model = Model(inputs = inp, outputs =[out, aux1, aux2])

Step 2: Compile the model

In [13]:
from tensorflow.keras import losses
model.compile(optimizer='adam', loss=[losses.sparse_categorical_crossentropy,
                                      losses.sparse_categorical_crossentropy,
                                      losses.sparse_categorical_crossentropy], loss_weights=[1, 0.3, 0.3], metrics=['accuracy', 'val_loss'])

Step 3: Fit the model

In [11]:
history = model.fit(trainX, [trainy, trainy, trainy], validation_data=(val_x, [val_y, val_y, val_y]), batch_size=64, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
