In [1]:
import torch 
import torch.nn as nn
!pip install -q torch-summary
from torchsummary import summary 
import torch.nn.functional as F

In [13]:
class Inception(nn.Module):
    def __init__(self, fan_in, n_1x1, n_1x1_b4_3x3, n_3x3, n_1x1_b4_5x5, n_5x5, n_1x1_pool):
        super().__init__()
        self._conv_block = ConvBlock
        self.block1 = self._conv_block(fan_in, n_1x1, kernel_size = 1)
        self.block2 = nn.Sequential(
            self._conv_block(fan_in, n_1x1_b4_3x3, kernel_size = 1),
            self._conv_block(n_1x1_b4_3x3, n_3x3, kernel_size = 3, padding = 'same')
        )
        self.block3 = nn.Sequential(
            self._conv_block(fan_in, n_1x1_b4_5x5, kernel_size = 1),
            self._conv_block(n_1x1_b4_5x5, n_5x5, kernel_size = 5, padding = 'same')
        )
        self.block4 = nn.Sequential(
            nn.MaxPool2d(3, stride = 1, ceil_mode = True, padding = 1),
            self._conv_block(fan_in, n_1x1_pool, kernel_size = 1)
        )
    
    def forward(self, x):
        x1 = self.block1(x) #(1, 64, 28, 28)
        x2 = self.block2(x) #(1, 128, 28, 28)
        x3 = self.block3(x) #(1, 32, 28, 28)
        x4 = self.block4(x) #(1, 32, 28, 28)
        
        
        return torch.cat([x1, x2, x3, x4], dim = 1)

        
        
#     def _conv_block(self, fan_in, fan_out, **kwargs):
#         block = nn.Sequential(
#             nn.Conv2d(fan_in, fan_out, bias = False, **kwargs),
#             nn.BatchNorm2d(fan_out),
#             nn.ReLU()
#         )
#         return block


    
class ConvBlock(nn.Module): #making this a function of the Inception class is infeasible since we need to use this in the GoogLeNet class as well.
    def __init__(self, fan_in, fan_out, kernel_size, **kwargs):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(fan_in, fan_out, kernel_size, bias = False, **kwargs),
            nn.BatchNorm2d(fan_out),
            nn.ReLU()
        )
    
    def forward(self, x):
        out = self.block(x)
        return out

In [14]:
class GoogLeNet(nn.Module):
    def __init__(self):
        super().__init__()
        inception_block = Inception
        conv_block = ConvBlock
        max_pool = nn.MaxPool2d(3, stride = 2, ceil_mode = True)
        lrn = nn.LocalResponseNorm(5, k = 2)
        
        self.conv1 = conv_block(3, 64, 7, stride = 2, padding = 3) #(1, 64, 112, 112)
        self.maxpool1 = max_pool #(1, 64, 56, 56)
        self.lrn1 = lrn #(1, 64, 56, 56)
        self.conv2_a = conv_block(64, 64, 1) #(1, 64, 56, 56)
        self.conv2_b = conv_block(64, 192, 3, padding = 'same') #(1, 192, 56, 56)
        self.lrn2 = lrn #(1, 192, 56, 56)
        self.maxpool2 = max_pool #(1, 192, 28, 28)
        self.inception1 = inception_block(192, 64, 96, 128, 16, 32, 32) #(1, 256, 28, 28)
        self.inception2 = inception_block(256, 128, 128, 192, 32, 96, 64) #(1, 480, 28, 28)
        self.maxpool3 = max_pool #(1, 480, 14, 14)
        self.inception3 = inception_block(480, 192, 96, 208, 16, 48, 64) #(1, 512, 14, 14)
        self.inception4 = inception_block(512, 160, 112, 224, 24, 64, 64) #(1, 512, 14, 14)
        self.inception5 = inception_block(512, 128, 128, 256, 24, 64, 64) #(1, 512, 14, 14)
        self.inception6 = inception_block(512, 112, 144, 288, 32, 64, 64) #(1, 528, 14, 14)
        self.inception7 = inception_block(528, 256, 160, 320, 32, 128, 128) #(1, 832, 14, 14)
        self.maxpool4 = max_pool #(1, 832, 7, 7)
        self.inception8 = inception_block(832, 256, 160, 320, 32, 128, 128) #(1, 832, 7, 7)
        self.inception9 = inception_block(832, 384, 192, 384, 48, 128, 128) #(1, 1024, 7, 7)
        self.avgpool1 = nn.AvgPool2d(7, stride = 1) #(1, 1024, 1, 1)
        self.ln1 = nn.Linear(1024, 1000)
    
    def forward(self, x):
        assert len(x.shape) == 4, f'Input tensor of dimension {x.shape}, requires to be 4.' # we have the BatchNorm layers in the Inception module hence we need a batch dimension
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.lrn1(x)
        x = self.conv2_a(x)
        x = self.conv2_b(x)
        x = self.lrn2(x)
        x = self.maxpool2(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = self.maxpool3(x)
        x = self.inception3(x)
        x = self.inception4(x)
        x = self.inception5(x)
        x = self.inception6(x)
        x = self.inception7(x)
        x = self.maxpool4(x)
        x = self.inception8(x)
        x = self.inception9(x)
        x = self.avgpool1(x)
        x = F.dropout(x, 0.4)
        x = x.view(-1, 1024)
        x = self.ln1(x)
        x = F.softmax(x, dim = -1)
        
        return x
        
        
        
        
        
        
        
   

In [16]:
x = torch.rand((1, 3, 224, 224), generator = torch.Generator().manual_seed(2))
model = GoogLeNet()
model(x).shape

torch.Size([1, 1000])

The end results we find matches the one mentioned in the paper, signifying that our approach has been accurate. Note that the original paper involves an `Auxilliary Inception` part which has not been included in this version of GoogLeNet implementation. 