## MobileNet V2
### Reference
Mark Sandler, et al., MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR, 2018. [link](https://arxiv.org/abs/1801.04381)
### Contents
* How to reduce model size (number of parameters)
* Architecture of baseline MobileNetV2
  * <img src='../etc/images/MobilenetV2-1.png' width=300>
  * <img src='../etc/images/MobilenetV2-2.png' width=300> 
  * <img src='../etc/images/MobilenetV2-3.png' width=300>
### Keys
* Inverted Residuals
* Linear Bottlenecks

In [106]:
import torch
from torch import nn
from torchinfo import summary

In [107]:
class DW_conv(nn.Module):
    def __init__(self, reduction, channels, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        if reduction:
            self.DW = nn.Sequential(
                nn.Conv2d(
                    in_channels=channels,
                    out_channels=channels,
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    groups=channels,
                    bias=False
                ),
                nn.BatchNorm2d(num_features=channels),
                nn.ReLU6()
            )
        else:
            self.DW = nn.Sequential(
                nn.Conv2d(
                    in_channels=channels,
                    out_channels=channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    groups=channels,
                    bias=False
                ),
                nn.BatchNorm2d(num_features=channels),
                nn.ReLU6()
            )

    def forward(self, X):
        return self.DW(X)

In [108]:
class PW_conv(nn.Module):
    def __init__(self, in_channels, out_channels, activation='ReLU6', *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.PW = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=1,
                bias=False
            ),
            nn.BatchNorm2d(
                num_features=out_channels
            )
        )
        if activation == 'ReLU6':
            self.PW.add_module(
                '2',
                nn.ReLU6()
            )
        elif activation == 'linear':
            pass

    def forward(self, X):
        return self.PW(X)

In [109]:
class bottleneck_s1(nn.Module):
    def __init__(self, t, in_channels, out_channels, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.residual_connection = in_channels == out_channels
        self.bn = nn.Sequential(
            PW_conv(
                in_channels=in_channels,
                out_channels=in_channels * t,
                activation='ReLU6'
            ),
            DW_conv(
                reduction=False,
                channels=in_channels * t
            ),
            PW_conv(
                in_channels=in_channels * t,
                out_channels=out_channels,
                activation='linear'
            )
        )

    def forward(self, X):
        if self.residual_connection:
            return X + self.bn(X)
        return self.bn(X)

In [110]:
class bottleneck_s2(nn.Module):
    def __init__(self, t, in_channels, out_channels, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.bn = nn.Sequential(
            PW_conv(
                in_channels=in_channels,
                out_channels=in_channels * t,
                activation='ReLU6'
            ),
            DW_conv(
                reduction=True,
                channels=in_channels * t
            ),
            PW_conv(
                in_channels=in_channels * t,
                out_channels=out_channels,
                activation='linear'
            )
        )

    def forward(self, X):
        return self.bn(X)

In [111]:
class bottlenecks(nn.Module):
    def __init__(self, t, in_channels, out_channels, n, reduction, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.bottlenecks = []
        for i in range(n):
            if i == 0:
                if reduction:
                    self.bottlenecks.append(
                        bottleneck_s2(
                            t=t,
                            in_channels=in_channels,
                            out_channels=out_channels
                        )
                    )
                else:
                    self.bottlenecks.append(
                        bottleneck_s1(
                            t=t,
                            in_channels=in_channels,
                            out_channels=out_channels
                        )
                    )
            else:
                self.bottlenecks.append(
                    bottleneck_s1(
                        t=t,
                        in_channels=out_channels,
                        out_channels=out_channels
                    )
                )

        self.bottlenecks = nn.Sequential(
            *self.bottlenecks
        )

    def forward(self, X):
        return self.bottlenecks(X)


In [112]:
model1 = bottlenecks(
    t=2,
    in_channels=32,
    out_channels=64,
    n=3,
    reduction=True
)
model1

bottlenecks(
  (bottlenecks): Sequential(
    (0): bottleneck_s2(
      (bn): Sequential(
        (0): PW_conv(
          (PW): Sequential(
            (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6()
          )
        )
        (1): DW_conv(
          (DW): Sequential(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6()
          )
        )
        (2): PW_conv(
          (PW): Sequential(
            (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
      )
    )
    (1): bottleneck_s1(
      (bn): Sequential(
        (0): PW_conv(
     

In [113]:
model1(torch.randn((10, 32, 224, 224))).shape

torch.Size([10, 64, 112, 112])

In [114]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes, dropout=0.2, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.sequence1 = nn.Sequential(
            nn.Conv2d(
                in_channels=3,
                out_channels=32,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False
            ),
            nn.BatchNorm2d(num_features=32),
            nn.ReLU6()
        )
        self.sequence2 = nn.Sequential(
            bottlenecks(
                t=1,
                in_channels=32,
                out_channels=16,
                n=1,
                reduction=False
            ),
            bottlenecks(6, 16, 24, 2, True),
            bottlenecks(6, 24, 32, 3, True),
            bottlenecks(6, 32, 64, 4, True),
            bottlenecks(6, 64, 96, 3, False),
            bottlenecks(6, 96, 160, 3, True),
            bottlenecks(6, 160, 320, 1, False),
        )
        self.sequence3 = nn.Sequential(
            nn.Conv2d(
                in_channels=320,
                out_channels=1280,
                kernel_size=1,
                bias=False
            ),
            nn.BatchNorm2d(num_features=1280),
            nn.ReLU6()
        )
        self.avgpool = nn.AvgPool2d(kernel_size=7)
        self.flatten = nn.Flatten()
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(
                in_features=1280,
                out_features=num_classes
            )
        )

    def forward(self, X):
        X = self.sequence1(X)
        X = self.sequence2(X)
        X = self.sequence3(X)
        X = self.avgpool(X)
        X = self.flatten(X)
        X = self.classifier(X)
        return X

In [115]:
model = MobileNetV2(num_classes=1000)
summary(model)

Layer (type:depth-idx)                                       Param #
MobileNetV2                                                  --
├─Sequential: 1-1                                            --
│    └─Conv2d: 2-1                                           864
│    └─BatchNorm2d: 2-2                                      64
│    └─ReLU6: 2-3                                            --
├─Sequential: 1-2                                            --
│    └─bottlenecks: 2-4                                      --
│    │    └─Sequential: 3-1                                  1,984
│    └─bottlenecks: 2-5                                      --
│    │    └─Sequential: 3-2                                  13,968
│    └─bottlenecks: 2-6                                      --
│    │    └─Sequential: 3-3                                  39,696
│    └─bottlenecks: 2-7                                      --
│    │    └─Sequential: 3-4                                  183,872
│    └─bottlenecks

In [116]:
model(torch.randn((10, 3, 224, 224))).shape

torch.Size([10, 1000])

In [45]:
# for comparison
import torchvision
model_temp = torchvision.models.mobilenet_v2()
summary(model_temp)

Layer (type:depth-idx)                             Param #
MobileNetV2                                        --
├─Sequential: 1-1                                  --
│    └─Conv2dNormActivation: 2-1                   --
│    │    └─Conv2d: 3-1                            864
│    │    └─BatchNorm2d: 3-2                       64
│    │    └─ReLU6: 3-3                             --
│    └─InvertedResidual: 2-2                       --
│    │    └─Sequential: 3-4                        896
│    └─InvertedResidual: 2-3                       --
│    │    └─Sequential: 3-5                        5,136
│    └─InvertedResidual: 2-4                       --
│    │    └─Sequential: 3-6                        8,832
│    └─InvertedResidual: 2-5                       --
│    │    └─Sequential: 3-7                        10,000
│    └─InvertedResidual: 2-6                       --
│    │    └─Sequential: 3-8                        14,848
│    └─InvertedResidual: 2-7                       --
│    │ 