## MobileNet V1
### Reference
Andrew G Howard, et al., MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications, 2017. [link](https://arxiv.org/abs/1704.04861)
### Contents
* How to reduce model size (number of parameters)
* Architecture of baseline MobileNet
  * <img src='../etc/images/Mobilenet-2.png' height=300> <img src='../etc/images/Mobilenet-3.png' height=300>
### Keys
* Depthwise separable convolutions
  * <img src='../etc/images/Mobilenet-1.png' height=300>
  * Standard convolutions: (a) Dk * Dk * Df * Df * m * n computational cost
  * Depthwise separable convolutions: (b + c) Dk * Dk * Df * Df * m + Df * Df * m * n computational cost
  * (Dk: input dimension, Df: filter dimension, m: input channels, n: output channels)
  * => 1/N + 1/(Dk)^2 ~ 1/8 - 1/9 cost reduction
* Width multiplier($\alpha$) and Resolution multiplier($\rho$)
  * *Not implemented in this model*
  * <img src='../etc/images/Mobilenet-4.png' width=300>
  * <img src='../etc/images/Mobilenet-5.png' height=200> <img src='../etc/images/Mobilenet-6.png' height=200>

In [1]:
import torch
from torch import nn
from torchinfo import summary

In [2]:
class DW_conv(nn.Module):
    def __init__(self, reduction, channels, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        if reduction:
            self.DW = nn.Sequential(
                nn.Conv2d(
                    in_channels=channels,
                    out_channels=channels,
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    groups=channels,
                    bias=False
                ),
                nn.BatchNorm2d(num_features=channels),
                nn.ReLU6()
            )
        else:
            self.DW = nn.Sequential(
                nn.Conv2d(
                    in_channels=channels,
                    out_channels=channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    groups=channels,
                    bias=False
                ),
                nn.BatchNorm2d(num_features=channels),
                nn.ReLU6()
            )

    def forward(self, X):
        return self.DW(X)

In [3]:
model1 = DW_conv(reduction=False, channels=32)
s1 = model1(torch.randn((10, 32, 224, 224)))
model2 = DW_conv(reduction=True, channels=32)
s2 = model2(torch.randn((10, 32, 224, 224)))

s1.shape, s2.shape

(torch.Size([10, 32, 224, 224]), torch.Size([10, 32, 112, 112]))

In [4]:
class PW_conv(nn.Module):
    def __init__(self, in_channels, out_channels, activation='ReLU6', *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.PW = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=1,
                bias=False
            ),
            nn.BatchNorm2d(
                num_features=out_channels
            )
        )
        if activation == 'ReLU6':
            self.PW.add_module(
                '2',
                nn.ReLU6()
            )
        elif activation == 'linear':
            pass

    def forward(self, X):
        return self.PW(X)

In [9]:
class MobileNetV1(nn.Module):
    def __init__(self, num_classes, dropout=0.2, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.sequence1 = nn.Sequential(
            nn.Conv2d(
                in_channels=3,
                out_channels=32,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False
            ),
            nn.BatchNorm2d(num_features=32),
            nn.ReLU6()
        )
        self.sequence2 = nn.Sequential(
            DW_conv(reduction=False, channels=32),
            PW_conv(in_channels=32, out_channels=64),
            DW_conv(reduction=True, channels=64),
            PW_conv(in_channels=64, out_channels=128)
        )
        self.sequence3 = nn.Sequential(
            DW_conv(reduction=False, channels=128),
            PW_conv(in_channels=128, out_channels=128),
            DW_conv(reduction=True, channels=128),
            PW_conv(in_channels=128, out_channels=256)
        )
        self.sequence4 = nn.Sequential(
            DW_conv(reduction=False, channels=256),
            PW_conv(in_channels=256, out_channels=256),
            DW_conv(reduction=True, channels=256),
            PW_conv(in_channels=256, out_channels=512)
        )
        self.sequence5 = nn.Sequential(
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
            DW_conv(reduction=False, channels=512),
            PW_conv(in_channels=512, out_channels=512),
        )
        self.sequence6 = nn.Sequential(
            DW_conv(reduction=True, channels=512),
            PW_conv(in_channels=512, out_channels=1024),
            DW_conv(reduction=False, channels=1024),
            PW_conv(in_channels=1024, out_channels=1024),
        )
        self.avgpool = nn.AvgPool2d(kernel_size=7)
        self.flatten = nn.Flatten()
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(
                in_features=1024,
                out_features=num_classes
            )
        )

    def forward(self, X):
        X = self.sequence1(X)
        X = self.sequence2(X)
        X = self.sequence3(X)
        X = self.sequence4(X)
        X = self.sequence5(X)
        X = self.sequence6(X)
        X = self.avgpool(X)
        X = self.flatten(X)
        X = self.classifier(X)
        return X

In [10]:
model = MobileNetV1(num_classes=101)
summary(model)

Layer (type:depth-idx)                   Param #
MobileNetV1                              --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       864
│    └─BatchNorm2d: 2-2                  64
│    └─ReLU6: 2-3                        --
├─Sequential: 1-2                        --
│    └─DW_conv: 2-4                      --
│    │    └─Sequential: 3-1              352
│    └─PW_conv: 2-5                      --
│    │    └─Sequential: 3-2              2,176
│    └─DW_conv: 2-6                      --
│    │    └─Sequential: 3-3              704
│    └─PW_conv: 2-7                      --
│    │    └─Sequential: 3-4              8,448
├─Sequential: 1-3                        --
│    └─DW_conv: 2-8                      --
│    │    └─Sequential: 3-5              1,408
│    └─PW_conv: 2-9                      --
│    │    └─Sequential: 3-6              16,640
│    └─DW_conv: 2-10                     --
│    │    └─Sequential: 3-7              1,408
│    └─P

In [11]:
model

MobileNetV1(
  (sequence1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6()
  )
  (sequence2): Sequential(
    (0): DW_conv(
      (DW): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6()
      )
    )
    (1): PW_conv(
      (PW): Sequential(
        (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6()
      )
    )
    (2): DW_conv(
      (DW): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_run

In [12]:
model(torch.randn((10, 3, 224, 224))).shape

torch.Size([10, 101])