# GoogleLeNet

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

How to design the kernel size of the convolution?  
Both small and large kernels have some advantages...  
Google presented *GoogleLeNet* an architecture that combine *NiN*, and various size of convolution ran in parallel inside each block called an **Inception** block

<center>
    <img src='images/inception.svg' height=100% style="margin-left:auto; margin-right:auto"/>
    <p style="font-size:14px;">Source: <a href='http://d2l.ai/'>D2L</a></p>
</center>

Padding and stride are made in such a way that input and output have the same height and width

1x1 convolution reduces the number of channels and thus, the complexity

In [2]:
class Inception(nn.Module):
    # from `c1` to `c4` are the number of output channels for each path
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # Path 3 is a 1 x 1 convolutional layer followed by a 5 x 5
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension, dim=0 is the batch dimension
        return torch.cat((p1, p2, p3, p4), dim=1)

<center>
    <img src='inception-full.svg' height=100% style="margin-left:auto; margin-right:auto"/>
    <p style="font-size:14px;">Source: <a href='http://d2l.ai/'>D2L</a></p>
</center>

In [3]:
googleLeNet = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                       nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                       nn.Conv2d(64, 64, kernel_size=1), nn.ReLU(),
                       nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                        Inception(192, 64, (96, 128), (16, 32), 32),
                       Inception(256, 128, (128, 192), (32, 96), 64),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [4]:
from torchinfo import summary
summary(googleLeNet, input_size=(32, 1, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               --                        --
├─Conv2d: 1-1                            [32, 64, 112, 112]        3,200
├─ReLU: 1-2                              [32, 64, 112, 112]        --
├─MaxPool2d: 1-3                         [32, 64, 56, 56]          --
├─Conv2d: 1-4                            [32, 64, 56, 56]          4,160
├─ReLU: 1-5                              [32, 64, 56, 56]          --
├─Conv2d: 1-6                            [32, 192, 56, 56]         110,784
├─ReLU: 1-7                              [32, 192, 56, 56]         --
├─MaxPool2d: 1-8                         [32, 192, 28, 28]         --
├─Inception: 1-9                         [32, 256, 28, 28]         --
│    └─Conv2d: 2-1                       [32, 64, 28, 28]          12,352
│    └─Conv2d: 2-2                       [32, 96, 28, 28]          18,528
│    └─Conv2d: 2-3                       [32, 128, 28, 28]        