https://github.com/pytorch/vision/blob/297e2b8746d32b46d20b0e6a6f867fde65e50e8d/torchvision/models/googlenet.py#L265

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

<img src="../res/inception_v1/inception_v1.png" alt="inception_module"/>

In [None]:
GoogLeNetOutputs = namedtuple("GoogLeNetOutputs", ["logits", "aux_logits2", "aux_logits1"])

In [None]:
class GoogLeNet(nn.Module):
    def __init__(
        self, 
        num_classes = 1000,
        aux_logits = True,  # Use aux module or not.
        dropout = 0.2,
        dropout_aux = 0.7
    ):
        super().__init__()
        if blocks is None:
            blocks = [BasicConv2d, Inception, InceptionAux]
            
        conv_block = blocks[0]
        inception_block = blocks[1]
        inception_aux_block = blocks[2]
        
        self.aux_logits = aux_logits
        
        self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.conv2 = conv_block(64, 64, kernel_size=1)
        self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        
        self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        
        self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)

        self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)
        
        if aux_logits:
            self.aux1 = inception_aux_block(512, num_classes, dropout=dropout_aux)
            self.aux2 = inception_aux_block(528, num_classes, dropout=dropout_aux)
        else:
            self.aux1 = None
            self.aux2 = None
            
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(1024, num_classes)
        
    def _transform_input(self, x: Tensor) -> Tensor:
        pass
    
    def _forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool2(x)
        
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)
        x = self.inception4a(x)

        aux1 = None
        if self.aux1 is not None:
            if self.training:
                aux1 = self.aux1(x)
        
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)

        aux2 = None
        if self.aux2 is not None:
            if self.training:
                aux2 = self.aux2(x)
                
        x = self.inception4e(x)
        x = self.maxpool4(x)
        x = self.inception5a(x)
        x = self.inception5b(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)

        return x, aux2, aux1
    
    def forward(self, x):
        x, aux1, aux2 = self._forward(x)
        aux_defined = self.training and self.aux_logits
        return GoogLeNetOutputs(x, aux2, aux1)  # !

<img src="../res/inception_v1/inception_module.png" alt="inception_module" width="500"/>

In [None]:
class Inception(nn.Module):
    def __init__(
        self, 
        in_channels, 
        ch1x1, 
        ch3x3red,  # ch3x3 dimension deduction
        ch3x3, 
        ch5x5red,  # ch5x5 dimension deduction
        ch5x5, 
        pool_proj, 
        conv_block=None
    ):
        if conv_block is None:
            conv_block = BasicConv2d
        
        self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            conv_block(in_channels, ch3x3red, kernel_size=1),
            conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )
        self.branch3 = nn.Sequential(
            conv_block(in_channels, ch5x5red, kernel_size=1),
            conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1)
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            conv_block(in_channels, pool_proj, kernel_size=1)
        )
    
    def _forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        
        outputs = [branch1, branch2, branch3, branch4]
        return outputs
    
    def forward(self, x):
        outputs = self._forward(x)
        return torch.cat(outputs, 1)  # (batch size, dimension, width, height)

In [None]:
class InceptionAux(nn.Module):
    def __init__(
        self, 
        in_channels: int, 
        num_classes: int, 
        conv_block=None, 
        dropout: float = 0.7
    ):
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.conv = conv_block(in_channels, 128, kernel_size=1)
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(p=dropout)
    
    def forward(self, x):
        # aux1: (N, 512, 14, 14)
        # aux2: (N, 528, 14, 14)
        x = F.adaptive_avg_pool2d(x, (4, 4))  # ?
        x = self.conv(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x), inplace=True)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [None]:
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channel, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channel, eps=0.001)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)