<a href="https://colab.research.google.com/github/aju22/GoogleNet/blob/main/GoogLeNet_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import tensorflow.keras.layers as layers

# Model Architecture


GoogLeNet is a type of convolutional neural network based on the Inception architecture. It utilises Inception modules, which allow the network to choose between multiple convolutional filter sizes in each block. An Inception network stacks these modules on top of each other, with occasional max-pooling layers with stride 2 to halve the resolution of the grid.

![](https://www.gabormelli.com/RKB/images/thumb/5/5e/GoogLeNet_slide58.png/650px-GoogLeNet_slide58.png)

In [2]:
class CNNBlock(layers.Layer):
  def __init__(self, out_channels, **kwargs):
    super().__init__()
    self.relu = layers.ReLU()
    self.conv = layers.Conv2D(out_channels, **kwargs)
    self.bn = layers.BatchNormalization()

  def call(self, x, training = False):
    x = self.conv(x, training = training)
    x = self.bn(x, training=training)
    x = self.relu(x)

    return x

## Inception Module

The inception module is different from previous architectures such as AlexNet, ZF-Net, where there is a fixed convolution size for each layer.


In the Inception module 1×1, 3×3, 5×5 convolution and 3×3 max pooling performed in a parallel way at the input and the output of these are stacked together to generated final output. The idea behind that convolution filters of different sizes will handle objects at multiple scale better.

![](https://miro.medium.com/max/751/1*AU4SaMgvVYFREakegt-R4A.png)

In [3]:
class InceptionBlock(layers.Layer):
  def __init__(self, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
    super().__init__()
    
    self.branch1 = CNNBlock(out_1x1, kernel_size=1, strides = 1, padding = 'same')

    self.branch2 = tf.keras.Sequential(
        [CNNBlock(red_3x3, kernel_size=1, strides = 1, padding = 'same'),
         
         CNNBlock(out_3x3, kernel_size=3, strides = 1, padding = 'same') 
         ])

    self.branch3 = tf.keras.Sequential(
        [CNNBlock(red_5x5, kernel_size=1),
         
         CNNBlock(out_5x5, kernel_size=5, strides = 1, padding = 'same') 
        ])

    self.branch4 = tf.keras.Sequential(
        [layers.MaxPooling2D(pool_size=(3, 3), strides=1, padding="same"),
         
         CNNBlock(out_1x1pool, kernel_size=1, strides = 1, padding = 'same') 
         ])

  def call(self, x):
    #N x Filters x R x C    

    #Axis = 1
    #Concatenate all the filters

    x = layers.concatenate([
                            self.branch1(x),
                            self.branch2(x),
                            self.branch3(x),
                            self.branch4(x)
                            ])

    return x

In [4]:
class GoogLeNet(tf.keras.Model):
  def __init__(self, num_classes = 1000):
    
    super().__init__()

    self.conv1 = CNNBlock(64, kernel_size = 7, strides = 2, padding ='same')
    self.maxpool1 = layers.MaxPooling2D((3,3), strides = 2, padding='same')

    self.convblock1 = tf.keras.Sequential([
                                           self.conv1,
                                           self.maxpool1
                                          ])



    self.conv2 = CNNBlock(192, kernel_size = 3, strides = 1, padding ='same')
    self.maxpool2 = layers.MaxPooling2D((3,3), strides = 2, padding='same')

    self.convblock2 = tf.keras.Sequential([
                                           self.conv2,
                                           self.maxpool2
                                          ])
    

    self.inception3a = InceptionBlock(64, 96, 128, 16, 32, 32)
    self.inception3b = InceptionBlock(128, 128, 192, 32, 96, 64)
    self.maxpool3 = layers.MaxPooling2D((3,3), strides = 2, padding='same')

    self.inception3 = tf.keras.Sequential([
                                           self.inception3a,
                                           self.inception3b,
                                           self.maxpool3
                                          ])


    self.inception4a = InceptionBlock(192, 96, 208, 16, 48, 64)
    self.inception4b = InceptionBlock(160, 112, 224, 24, 64, 64)
    self.inception4c = InceptionBlock(128, 128, 256, 24, 64, 64)
    self.inception4d = InceptionBlock(112, 144, 288, 32, 64, 64)
    self.inception4e = InceptionBlock(256, 160, 320, 32, 128, 128)
    self.maxpool4 = layers.MaxPooling2D((3,3), strides = 2, padding='same')

    self.inception4 = tf.keras.Sequential([
                                           self.inception4a,
                                           self.inception4b,
                                           self.inception4c,
                                           self.inception4d,
                                           self.inception4e,
                                           self.maxpool4,    
                                           ])
    

    self.inception5a = InceptionBlock(256, 160, 320, 32, 128, 128)
    self.inception5b = InceptionBlock(384, 192, 384, 48, 128, 128)
    self.avgpool = layers.AveragePooling2D(pool_size=(7, 7), strides=1, padding="same")

    self.inception5 = tf.keras.Sequential([
                                           self.inception5a,
                                           self.inception5b,
                                           self.avgpool,    
                                           ])
    
    #self.reshape = layers.Reshape()

    self.dropout = layers.Dropout(0.4)
    self.fc1 = layers.Dense(num_classes)

  def call(self, x):

    x = self.convblock1(x)
    x = self.convblock2(x)
    
    x = self.inception3(x)
    x = self.inception4(x)
    x = self.inception5(x)

    x = tf.reshape(x, (x.shape[0], -1))

    x = self.dropout(x)
    x = self.fc1(x)

    return x

In [5]:
x = tf.random.normal((3, 3, 224, 224))

In [6]:
model = GoogLeNet()

In [7]:
print(model(x).shape)

(3, 1000)
