## Inception/GoogleNet Implementation with Keras

In [1]:
import cv2
import numpy as np
import keras
import math
from tensorflow.keras.layers import Layer
import tensorflow.keras.backend as K
import tensorflow as tf
from keras.models import Model

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Dense, Input, concatenate,\
    GlobalAveragePooling2D, AveragePooling2D, Flatten

from tensorflow.keras import backend as K
from keras.utils import np_utils
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler

In [2]:
num_classes = 10

def load_cifar10_data(img_rows, img_cols):
    
    (X_train, Y_train), (X_valid, Y_valid) = cifar10.load_data()
    
    X_train = np.array([cv2.resize(img, (img_rows, img_cols)) for img in X_train[:,:,:,:]])
    X_valid = np.array([cv2.resize(img, (img_rows, img_cols)) for img in X_valid[:,:,:,:]])
    
    
    Y_train = np_utils.to_categorical(Y_train, num_classes)
    Y_valid = np_utils.to_categorical(Y_valid, num_classes)
    
    X_train = X_train.astype('float32')
    X_valid = X_valid.astype('float32')
    
    X_train = X_train / 255.0
    X_valid = X_valid / 255.0
    
    return X_train, Y_train, X_valid, Y_valid

In [3]:
num_classes = 10

def load_cifar10_data(img_rows, img_cols):

    # Load cifar10 training and validation sets
    (X_train, Y_train), (X_valid, Y_valid) = cifar10.load_data()

    # Resize training images
    X_train = np.array([cv2.resize(img, (img_rows,img_cols)) for img in X_train[:,:,:,:]])
    X_valid = np.array([cv2.resize(img, (img_rows,img_cols)) for img in X_valid[:,:,:,:]])

    # Transform targets to keras compatible format
    Y_train = np_utils.to_categorical(Y_train, num_classes)
    Y_valid = np_utils.to_categorical(Y_valid, num_classes)
    
    X_train = X_train.astype('float32')
    X_valid = X_valid.astype('float32')

    # preprocess data
    X_train = X_train / 255.0
    X_valid = X_valid / 255.0

    return X_train, Y_train, X_valid, Y_valid

In [4]:
X_train, y_train, X_test, y_test = load_cifar10_data(32, 32)

In [5]:
def inception_module(x,
                    filters_1x1,
                    filters_3x3_reduce,
                    filters_3x3,
                    filters_5x5_reduce,
                    filters_5x5,
                    filters_pool_proj,
                    name=None):
    
    conv_1x1 = Conv2D(filters_1x1, (1,1), padding='same', activation='relu', kernel_initializer=kernel_init, bias_initializer=bias_init)(x)
    
    conv_3x3 = Conv2D(filters_3x3_reduce, (1,1), padding='same', activation='relu', kernel_initializer=kernel_init, bias_initializer=bias_init)(x)
    conv_3x3 = Conv2D(filters_3x3, (3,3), padding='same', activation='relu', kernel_initializer=kernel_init, bias_initializer=bias_init)(conv_3x3)
    
    conv_5x5 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu', kernel_initializer=kernel_init, bias_initializer=bias_init)(x)
    conv_5x5 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu', kernel_initializer=kernel_init, bias_initializer=bias_init)(conv_5x5)
    
    pool_proj = MaxPool2D((3,3), strides=(1,1), padding='same')(x)
    pool_proj = Conv2D(filters_pool_proj, (1,1), padding='same', activation='relu', kernel_initializer=kernel_init, bias_initializer=bias_init)(pool_proj)
    
    output = concatenate([conv_1x1, conv_3x3, conv_5x5, pool_proj], axis=3, name=name)
    
    return output

In [6]:
kernel_init = tf.keras.initializers.GlorotUniform()
bias_init = keras.initializers.Constant(value=0.2)

In [7]:
input_layer = Input(shape=(32, 32, 3))

x = tf.keras.layers.UpSampling2D((2,2))(input_layer)
x = tf.keras.layers.UpSampling2D((2,2))(x)
x = tf.keras.layers.UpSampling2D((2,2))(x)

# Inception
x = Conv2D(64, (7,7), padding='same', strides=(2,2), activation='relu', name='conv_1_7x7/2', kernel_initializer=kernel_init, bias_initializer=bias_init)(x)
x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_1_3x3/2')(x)
#x = Conv2D(64, (1, 1), padding='same', strides=(1, 1), activation='relu', name='conv_2a_3x3/1')(x)
x = Conv2D(192, (3,3), padding='same', strides=(1,1), activation='relu', name='conv_2b_3x3/1')(x)
x = MaxPool2D((3,3), padding='same', strides=(2,2), name='max_pool_2_3x3/2')(x)


x = inception_module(x,
                     filters_1x1=64,
                     filters_3x3_reduce=96,
                     filters_3x3=128,
                     filters_5x5_reduce=16,
                     filters_5x5=32,
                     filters_pool_proj=32,
                     name='inception_3a')

x = inception_module(x,
                     filters_1x1=128,
                     filters_3x3_reduce=128,
                     filters_3x3=192,
                     filters_5x5_reduce=32,
                     filters_5x5=96,
                     filters_pool_proj=64,
                     name='inception_3b')

x = MaxPool2D((3,3), padding='same', strides=(2,2), name='max_pool_3_3x3/2')(x)

x = inception_module(x,
                     filters_1x1=192,
                     filters_3x3_reduce=96,
                     filters_3x3=208,
                     filters_5x5_reduce=16,
                     filters_5x5=48,
                     filters_pool_proj=64,
                     name='inception_4a')

classifier_1 = AveragePooling2D((5,5), strides=3)(x)
classifier_1 = Conv2D(128, (1,1), padding='same', activation='relu')(classifier_1)
classifier_1 = Flatten()(classifier_1)
classifier_1 = Dense(1024, activation='relu')(classifier_1)
classifier_1 = Dropout(0.7)(classifier_1)
classifier_1 = Dense(10, activation='softmax', name='auxilliary_ouput_1')(classifier_1)


x = inception_module(x,
                     filters_1x1=160,
                     filters_3x3_reduce=112,
                     filters_3x3=224,
                     filters_5x5_reduce=24,
                     filters_5x5=64,
                     filters_pool_proj=64,
                     name='inception_4b')

x = inception_module(x,
                     filters_1x1=128,
                     filters_3x3_reduce=128,
                     filters_3x3=256,
                     filters_5x5_reduce=24,
                     filters_5x5=64,
                     filters_pool_proj=64,
                     name='inception_4c')

x = inception_module(x,
                     filters_1x1=112,
                     filters_3x3_reduce=144,
                     filters_3x3=288,
                     filters_5x5_reduce=32,
                     filters_5x5=64,
                     filters_pool_proj=64,
                     name='inception_4d')


classifier_2 = AveragePooling2D((5, 5), strides=3)(x)
classifier_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(classifier_2)
classifier_2 = Flatten()(classifier_2)
classifier_2 = Dense(1024, activation='relu')(classifier_2)
classifier_2 = Dropout(0.7)(classifier_2)
classifier_2 = Dense(10, activation='softmax', name='auxilliary_output_2')(classifier_2)


x = inception_module(x,
                     filters_1x1=256,
                     filters_3x3_reduce=160,
                     filters_3x3=320,
                     filters_5x5_reduce=32,
                     filters_5x5=128,
                     filters_pool_proj=128,
                     name='inception_4e')

x = MaxPool2D((3, 3), padding='same', strides=(2, 2), name='max_pool_4_3x3/2')(x)

x = inception_module(x,
                     filters_1x1=256,
                     filters_3x3_reduce=160,
                     filters_3x3=320,
                     filters_5x5_reduce=32,
                     filters_5x5=128,
                     filters_pool_proj=128,
                     name='inception_5a')

x = inception_module(x,
                     filters_1x1=384,
                     filters_3x3_reduce=192,
                     filters_3x3=384,
                     filters_5x5_reduce=48,
                     filters_5x5=128,
                     filters_pool_proj=128,
                     name='inception_5b')

x = GlobalAveragePooling2D(name='avg_pool_5_3x3/1')(x)
# x = Conv2D(128, (1, 1), padding='same', activation='relu')(x)
# x = Flatten()(x)
x = Dropout(0.4)(x)
#x = Dense(1000, activation='relu', name='linear')(x)
x = Dense(10, activation='softmax', name='output')(x)

In [8]:
x.shape

TensorShape([None, 10])

In [9]:
model = Model(input_layer, [x], name='googlenet')

In [10]:
model.summary()

Model: "googlenet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 up_sampling2d (UpSampling2D)   (None, 64, 64, 3)    0           ['input_1[0][0]']                
                                                                                                  
 up_sampling2d_1 (UpSampling2D)  (None, 128, 128, 3)  0          ['up_sampling2d[0][0]']          
                                                                                                  
 up_sampling2d_2 (UpSampling2D)  (None, 256, 256, 3)  0          ['up_sampling2d_1[0][0]']        
                                                                                          

                                                                                                  
 conv2d_22 (Conv2D)             (None, 16, 16, 24)   12312       ['inception_4a[0][0]']           
                                                                                                  
 max_pooling2d_3 (MaxPooling2D)  (None, 16, 16, 512)  0          ['inception_4a[0][0]']           
                                                                                                  
 conv2d_19 (Conv2D)             (None, 16, 16, 160)  82080       ['inception_4a[0][0]']           
                                                                                                  
 conv2d_21 (Conv2D)             (None, 16, 16, 224)  226016      ['conv2d_20[0][0]']              
                                                                                                  
 conv2d_23 (Conv2D)             (None, 16, 16, 64)   38464       ['conv2d_22[0][0]']              
          

                                                                                                  
 conv2d_44 (Conv2D)             (None, 8, 8, 256)    213248      ['max_pool_4_3x3/2[0][0]']       
                                                                                                  
 conv2d_46 (Conv2D)             (None, 8, 8, 320)    461120      ['conv2d_45[0][0]']              
                                                                                                  
 conv2d_48 (Conv2D)             (None, 8, 8, 128)    102528      ['conv2d_47[0][0]']              
                                                                                                  
 conv2d_49 (Conv2D)             (None, 8, 8, 128)    106624      ['max_pooling2d_7[0][0]']        
                                                                                                  
 inception_5a (Concatenate)     (None, 8, 8, 832)    0           ['conv2d_44[0][0]',              
          

In [11]:
model_with_classifiers = Model(input_layer, [x, classifier_1, classifier_2], name='googlenet_complete_architecture')
model_with_classifiers.summary()

Model: "googlenet_complete_architecture"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 up_sampling2d (UpSampling2D)   (None, 64, 64, 3)    0           ['input_1[0][0]']                
                                                                                                  
 up_sampling2d_1 (UpSampling2D)  (None, 128, 128, 3)  0          ['up_sampling2d[0][0]']          
                                                                                                  
 up_sampling2d_2 (UpSampling2D)  (None, 256, 256, 3)  0          ['up_sampling2d_1[0][0]']        
                                                                    

                                                                                                  
 conv2d_22 (Conv2D)             (None, 16, 16, 24)   12312       ['inception_4a[0][0]']           
                                                                                                  
 max_pooling2d_3 (MaxPooling2D)  (None, 16, 16, 512)  0          ['inception_4a[0][0]']           
                                                                                                  
 conv2d_19 (Conv2D)             (None, 16, 16, 160)  82080       ['inception_4a[0][0]']           
                                                                                                  
 conv2d_21 (Conv2D)             (None, 16, 16, 224)  226016      ['conv2d_20[0][0]']              
                                                                                                  
 conv2d_23 (Conv2D)             (None, 16, 16, 64)   38464       ['conv2d_22[0][0]']              
          

                                                                                                  
 conv2d_44 (Conv2D)             (None, 8, 8, 256)    213248      ['max_pool_4_3x3/2[0][0]']       
                                                                                                  
 conv2d_46 (Conv2D)             (None, 8, 8, 320)    461120      ['conv2d_45[0][0]']              
                                                                                                  
 conv2d_48 (Conv2D)             (None, 8, 8, 128)    102528      ['conv2d_47[0][0]']              
                                                                                                  
 conv2d_49 (Conv2D)             (None, 8, 8, 128)    106624      ['max_pooling2d_7[0][0]']        
                                                                                                  
 inception_5a (Concatenate)     (None, 8, 8, 832)    0           ['conv2d_44[0][0]',              
          

In [12]:
epochs = 1
initial_lrate = 0.01

def decay(epoch, steps=100):
    initial_lrate = 0.01
    drop = 0.96
    epochs_drop = 8
    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    return lrate

sgd = SGD(lr=initial_lrate, momentum=0.9, nesterov=False)

lr_sc = LearningRateScheduler(decay, verbose=1)

model_with_classifiers.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
                             loss_weights=[1, 0.3, 0.3], optimizer=sgd, metrics=['accuracy'])

  super(SGD, self).__init__(name, **kwargs)


In [13]:
history = model_with_classifiers.fit(X_train, [y_train, y_train, y_train], validation_data=(X_test, [y_test, y_test, y_test]), 
                    epochs=epochs, batch_size=64, callbacks=[lr_sc])


Epoch 1: LearningRateScheduler setting learning rate to 0.01.


In [22]:
epochs = 1
initial_lrate = 0.01

def decay(epoch, steps=100):
    initial_lrate = 0.01
    drop = 0.96
    epochs_drop = 8
    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    return lrate

sgd = SGD(lr=initial_lrate, momentum=0.9, nesterov=False)

lr_sc = LearningRateScheduler(decay, verbose=1)

model.compile(loss=['categorical_crossentropy'],
                             loss_weights=[1], optimizer=sgd, metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), 
                    epochs=epochs, batch_size=256, callbacks=[lr_sc])


Epoch 1: LearningRateScheduler setting learning rate to 0.01.
  5/196 [..............................] - ETA: 7:24:56 - loss: 2.6015 - accuracy: 0.0992