# InceptionV1 ( GoogLeNet) implementation with Tensorflow-Keras

* Developed by Mohammad Hassan Heydari
* Based on the paper *Going Deeper with Convolutions*
* Trained on 2% of CIFAR10 datset due to limitations :(

In [1]:

# importing necessary libraries

import keras
from keras.layers import Layer
import keras.backend as K
import tensorflow as tf
from keras.datasets import cifar10
import math

from keras.models import Model
from keras.layers import Conv2D, MaxPool2D, \
    Dropout, Dense, Input, concatenate, \
    GlobalAveragePooling2D, AveragePooling2D, \
    Flatten

import numpy as np
from keras.datasets import cifar10
from keras import backend as K

from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler


In [2]:
def load_cifar10_data():

    num_classes = 10
    # Load cifar10 training and validation sets
    (X_train, Y_train), (X_valid, Y_valid) = cifar10.load_data()



    X_train = X_train.astype('float32')
    X_valid = X_valid.astype('float32')

    # preprocess data
    X_train = X_train / 255.0
    X_valid = X_valid / 255.0

    return X_train, Y_train, X_valid, Y_valid

X_train, y_train, X_test, y_test = load_cifar10_data()

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
(50000, 32, 32, 3)
(50000, 1)
(10000, 32, 32, 3)
(10000, 1)


In [3]:
from cv2 import resize
# Resize training images
X_train = np.array([resize(img, (224, 224)) for img in X_train[:1000]])
X_test = np.array([resize(img, (224, 224)) for img in X_test[:200]])

y_train = y_train[:1000]
y_test = y_test[:200]


In [4]:

# inception block
def inception_module(input,
                     filters_1x1,
                     filters_3x3_reduce,
                     filters_3x3,
                     filters_5x5_reduce,
                     filters_5x5,
                     filters_pool_proj,
                     name=None):
    conv_1x1 = Conv2D(filters_1x1, (1, 1), padding='same', activation='relu')(input)

    conv_3x3 = Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu')(input)
    conv_3x3 = Conv2D(filters_3x3, (3, 3), padding='same', activation='relu')(conv_3x3)

    conv_5x5 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu')(input)
    conv_5x5 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu')(conv_5x5)

    pool_proj = MaxPool2D((3, 3), strides=(1, 1), padding='same')(input)
    pool_proj = Conv2D(filters_pool_proj, (1, 1), padding='same', activation='relu')(pool_proj)


    # remember , use concatenate layer, Not concatenate.Concatenate
    output = concatenate([conv_1x1, conv_3x3, conv_5x5, pool_proj],axis=3, name=name)

    return output

input_layer = Input(shape=(224, 224, 3))

x = Conv2D(64, (7, 7), padding='same', strides=(2, 2), activation='relu', name='conv_1_7x7/2')(input_layer)
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_1_3x3/2')(x)
x = Conv2D(64, (1, 1), padding='same', strides=(1, 1), activation='relu', name='conv_2a_3x3/1')(x)
x = Conv2D(192, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv_2b_3x3/1')(x)
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_2_3x3/2')(x)

x = inception_module(x,
                     filters_1x1=16,
                     filters_3x3_reduce=16,
                     filters_3x3=16,
                     filters_5x5_reduce=16,
                     filters_5x5=16,
                     filters_pool_proj=16,
                     name='inception_3a')

x = inception_module(x,
                     filters_1x1=32,
                     filters_3x3_reduce=32,
                     filters_3x3=32,
                     filters_5x5_reduce=32,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_3b')

x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_3_3x3/2')(x)

x = inception_module(x,
                     filters_1x1=32,
                     filters_3x3_reduce=32,
                     filters_3x3=32,
                     filters_5x5_reduce=32,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_4a')

# getting out of the network , first output
output_1 = AveragePooling2D((5, 5), strides=3)(x)
output_1 = Conv2D(64, (1, 1), padding='same', activation='relu')(output_1)
output_1 = Flatten()(output_1)
output_1 = Dense(64, activation='relu')(output_1)
output_1 = Dropout(0.2)(output_1)
output_1 = Dense(10, activation='softmax', name='output_1')(output_1)

x = inception_module(x,
                     filters_1x1=32,
                     filters_3x3_reduce=32,
                     filters_3x3=32,
                     filters_5x5_reduce=32,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_4b')

x = inception_module(x,
                     filters_1x1=32,
                     filters_3x3_reduce=32,
                     filters_3x3=32,
                     filters_5x5_reduce=32,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_4c')

x = inception_module(x,
                     filters_1x1=32,
                     filters_3x3_reduce=32,
                     filters_3x3=32,
                     filters_5x5_reduce=32,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_4d')

# getting out of the network , second output
output_2 = AveragePooling2D((5, 5), strides=3)(x)
output_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(output_2)
output_2 = Flatten()(output_2)
output_2 = Dense(64, activation='relu')(output_2)
output_2 = Dropout(0.2)(output_2)
output_2 = Dense(10, activation='softmax', name='auxilliary_output_2')(output_2)

x = inception_module(x,
                     filters_1x1=32,
                     filters_3x3_reduce=32,
                     filters_3x3=32,
                     filters_5x5_reduce=32,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_4e')

x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_4_3x3/2')(x)

x = inception_module(x,
                     filters_1x1=256,
                     filters_3x3_reduce=160,
                     filters_3x3=320,
                     filters_5x5_reduce=32,
                     filters_5x5=128,
                     filters_pool_proj=128,
                     name='inception_5a')

x = inception_module(x,
                     filters_1x1=384,
                     filters_3x3_reduce=192,
                     filters_3x3=384,
                     filters_5x5_reduce=48,
                     filters_5x5=128,
                     filters_pool_proj=128,
                     name='inception_5b')

x = GlobalAveragePooling2D(name='avg_pool_5_3x3/1')(x)

x = Dropout(0.4)(x)

# final output
output_3 = Dense(10, activation='softmax', name='output')(x)


In [5]:
model = Model(input_layer, [output_3, output_1, output_2], name='inception_v1')

model.summary()

Model: "inception_v1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv_1_7x7/2 (Conv2D)       (None, 112, 112, 64)         9472      ['input_1[0][0]']             
                                                                                                  
 max_pool_1_3x3/2 (MaxPooli  (None, 56, 56, 64)           0         ['conv_1_7x7/2[0][0]']        
 ng2D)                                                                                            
                                                                                                  
 conv_2a_3x3/1 (Conv2D)      (None, 56, 56, 64)           4160      ['max_pool_1_3x3/2[

In [6]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [8]:
epochs = 20
initial_lrate = 0.01

def decay(epoch, steps=100):
    initial_lrate = 0.01
    drop = 0.96
    epochs_drop = 8
    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    return lrate

sgd = SGD(lr=initial_lrate, momentum=0.9, nesterov=False)

lr_sc = LearningRateScheduler(decay, verbose=1)

print(y_train[0])
model.compile(loss='sparse_categorical_crossentropy',
              loss_weights=[1, 0.3, 0.3],
              optimizer=sgd,
              metrics=['accuracy'])

history = model.fit(X_train, [y_train, y_train, y_train],
                    validation_data=(X_test, [y_test, y_test, y_test]),
                    epochs=epochs,
                    batch_size=256,
                    callbacks=[lr_sc])




[6]

Epoch 1: LearningRateScheduler setting learning rate to 0.01.
Epoch 1/20

Epoch 2: LearningRateScheduler setting learning rate to 0.01.
Epoch 2/20

Epoch 3: LearningRateScheduler setting learning rate to 0.01.
Epoch 3/20

Epoch 4: LearningRateScheduler setting learning rate to 0.01.
Epoch 4/20

Epoch 5: LearningRateScheduler setting learning rate to 0.01.
Epoch 5/20

Epoch 6: LearningRateScheduler setting learning rate to 0.01.
Epoch 6/20

Epoch 7: LearningRateScheduler setting learning rate to 0.01.
Epoch 7/20

Epoch 8: LearningRateScheduler setting learning rate to 0.0096.
Epoch 8/20

Epoch 9: LearningRateScheduler setting learning rate to 0.0096.
Epoch 9/20

Epoch 10: LearningRateScheduler setting learning rate to 0.0096.
Epoch 10/20

Epoch 11: LearningRateScheduler setting learning rate to 0.0096.
Epoch 11/20

Epoch 12: LearningRateScheduler setting learning rate to 0.0096.
Epoch 12/20

Epoch 13: LearningRateScheduler setting learning rate to 0.0096.
Epoch 13/20

Epoch 14: Lea

* **Because of the limitations of RAM and GPU , we only used 2% of the whole dataset and we decreased number of Conv kernels as well , so dont expect high accuracy brothers and sister :)**