## MobileNet V1
### Reference
Andrew G Howard, et al., MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications, 2017. [link](https://arxiv.org/abs/1704.04861)
### Contents
* How to reduce model size (number of parameters)
* Architecture of baseline MobileNet
  * <img src='../etc/images/Mobilenet-2.png' height=300> <img src='../etc/images/Mobilenet-3.png' height=300>
### Keys
* Depthwise separable convolutions
  * <img src='../etc/images/Mobilenet-1.png' height=300>
  * Standard convolutions: (a) Dk * Dk * Df * Df * m * n computational cost
  * Depthwise separable convolutions: (b + c) Dk * Dk * Df * Df * m + Df * Df * m * n computational cost
  * (Dk: input dimension, Df: filter dimension, m: input channels, n: output channels)
  * => 1/N + 1/(Dk)^2 ~ 1/8 - 1/9 cost reduction
* Width multiplier($\alpha$) and Resolution multiplier($\rho$)
  * *Not implemented in this model*
  * <img src='../etc/images/Mobilenet-4.png' width=300>
  * <img src='../etc/images/Mobilenet-5.png' height=200> <img src='../etc/images/Mobilenet-6.png' height=200>

In [24]:
import torch
from torch import nn
from torchinfo import summary

In [4]:
class DW_conv(nn.Module):
    def __init__(self, reduction, channels, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.reduction = reduction
        self.DW_s1 = nn.Sequential(
            nn.Conv2d(
                in_channels=channels,
                out_channels=channels,
                kernel_size=3,
                stride=1,
                padding=1,
                groups=channels
            ),
            nn.BatchNorm2d(num_features=channels),
            nn.ReLU()
        )
        self.DW_s2 = nn.Sequential(
            nn.Conv2d(
                in_channels=channels,
                out_channels=channels,
                kernel_size=3,
                stride=2,
                padding=1,
                groups=channels
            ),
            nn.BatchNorm2d(num_features=channels),
            nn.ReLU()
        )

    def forward(self, X):
        if self.reduction:
            return self.DW_s2(X)
        return self.DW_s1(X)

In [11]:
model1 = DW_conv(reduction=False, channels=32)
s1 = model1(torch.randn((10, 32, 224, 224)))
model2 = DW_conv(reduction=True, channels=32)
s2 = model2(torch.randn((10, 32, 224, 224)))

s1.shape, s2.shape

(torch.Size([10, 32, 224, 224]), torch.Size([10, 32, 112, 112]))

In [13]:
class PW_conv(nn.Module):
    def __init__(self, in_channels, out_channels, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.PW = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=1,
            ),
            nn.BatchNorm2d(
                num_features=out_channels
            ),
            nn.ReLU()
        )

    def forward(self, X):
        return self.PW(X)

In [33]:
class MobileNetV1(nn.Module):
    def __init__(self, num_classes, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.sequence1 = nn.Sequential(
            nn.Conv2d(
                in_channels=3,
                out_channels=32,
                kernel_size=3,
                stride=2,
                padding=1
            ),
            nn.BatchNorm2d(num_features=32),
            nn.ReLU()
        )
        self.sequence2 = nn.Sequential(
            DW_conv(reduction=False, channels=32),
            PW_conv(in_channels=32, out_channels=64),
            DW_conv(reduction=True, channels=64),
            PW_conv(in_channels=64, out_channels=128)
        )
        self.sequence3 = nn.Sequential(
            DW_conv(reduction=False, channels=128),
            PW_conv(in_channels=128, out_channels=128),
            DW_conv(reduction=True, channels=128),
            PW_conv(in_channels=128, out_channels=256)
        )
        self.sequence4 = nn.Sequential(
            DW_conv(reduction=False, channels=256),
            PW_conv(in_channels=256, out_channels=256),
            DW_conv(reduction=True, channels=256),
            PW_conv(in_channels=256, out_channels=512)
        )
        self.sequence5 = nn.Sequential(
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
        )
        self.sequence6 = nn.Sequential(
            DW_conv(reduction=True, channels=512),
            PW_conv(in_channels=512, out_channels=1024),
            DW_conv(reduction=False, channels=1024),
            PW_conv(in_channels=1024, out_channels=1024),
        )
        self.avgpool = nn.AvgPool2d(kernel_size=7)
        self.flatten = nn.Flatten()
        self.classifier = nn.Linear(
            in_features=1024,
            out_features=num_classes
        )

    def forward(self, X):
        X = self.sequence1(X)
        X = self.sequence2(X)
        X = self.sequence3(X)
        X = self.sequence4(X)
        X = self.sequence5(X)
        X = self.sequence6(X)
        X = self.avgpool(X)
        X = self.flatten(X)
        X = self.classifier(X)
        return X

In [34]:
model = MobileNetV1(num_classes=101)
summary(model)

Layer (type:depth-idx)                   Param #
MobileNetV1                              --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       896
│    └─BatchNorm2d: 2-2                  64
│    └─ReLU: 2-3                         --
├─Sequential: 1-2                        --
│    └─DW_conv: 2-4                      --
│    │    └─Sequential: 3-1              384
│    │    └─Sequential: 3-2              384
│    └─PW_conv: 2-5                      --
│    │    └─Sequential: 3-3              2,240
│    └─DW_conv: 2-6                      --
│    │    └─Sequential: 3-4              768
│    │    └─Sequential: 3-5              768
│    └─PW_conv: 2-7                      --
│    │    └─Sequential: 3-6              8,576
├─Sequential: 1-3                        --
│    └─DW_conv: 2-8                      --
│    │    └─Sequential: 3-7              1,536
│    │    └─Sequential: 3-8              1,536
│    └─PW_conv: 2-9                      --
│    │    

In [35]:
model(torch.randn((10, 3, 224, 224))).shape

torch.Size([10, 101])