----------------------------------------------------------------------------------------------------------------------------------------

## MobileNet V3

![1](https://d3i71xaburhd42.cloudfront.net/7e0884e27643c212f32e9ab5dacfb552922eda07/2-Figure1-1.png)
![3](https://3033184753-files.gitbook.io/~/files/v0/b/gitbook-legacy-files/o/assets%2Fml%2F-MMARWLU6xXHUlsfby29%2F-MMAYgULojAuyT4gAmIY%2FUntitled%208.png?generation=1605437934431052&alt=media)
![2](https://3033184753-files.gitbook.io/~/files/v0/b/gitbook-legacy-files/o/assets%2F-MM9anUwG1_XXIkWZ5fh%2F-MMARWLU6xXHUlsfby29%2F-MMAaBhxbGqux6nA_vwV%2Fmobilenetv3-large.png?alt=media&token=caf2b32e-c384-48d9-8d5f-fb0c70d79f9a)

In [9]:
def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    # 쉽게 말해, 이 함수는 가까운 8의 배수를 찾아줌
    
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # divisor / 2 는 반올림을 위해 (너무 작아지지 않게)
    # case 1) v=10, divisor = 8 이면 10+4 // 8 * 8 = 8 근데 10 => 8 은 10% 이상 빠지는 거니까 8+8 = 16 으로 조정됨
    # case 2) v=39, divisor = 8 이면 39+4 // 8 * 8 = 40 => 10%보다 빠지지 않았기 때문에 40이 출력됨!

    if new_v < 0.9 * v: # 10% 보다 더 빠지지 않게 조정
        new_v += divisor
        
    return new_v

class SEBlock(nn.Module):
    def __init__(self, in_channels, r = 4): # mobilenet V3 에서는 reduction ratio r=4로!
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool2d((1,1))
        self.excitation = nn.Sequential(nn.Linear(in_channels, _make_divisible(in_channels // r, 8)),
                                        nn.ReLU(inplace=True),
                                        nn.Linear(_make_divisible(in_channels // r, 8), in_channels),
                                        nn.Hardsigmoid(inplace=True)) # Hard sigmoid!

    def forward(self, x):
        SE = self.squeeze(x)
        SE = SE.reshape(x.shape[0],x.shape[1])
        SE = self.excitation(SE)
        SE = SE.unsqueeze(dim=2).unsqueeze(dim=3)
        x = x * SE
        return x

class DepSESep(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, use_se, use_hs, stride):
        super().__init__()

        self.depthwise = nn.Sequential(nn.Conv2d(in_channels, in_channels, kernel_size, stride = stride, padding = (kernel_size - 1) // 2, groups = in_channels, bias=False),
                                       nn.BatchNorm2d(in_channels, momentum=0.99), # momentum = 0.99 는 논문에서 제시
                                       nn.Hardswish(inplace=True) if use_hs else nn.ReLU(inplace=True)) # hs 아니면 걍 ReLU (ReLU6 아님)
        
        self.seblock = SEBlock(in_channels) if use_se else None

        self.pointwise = nn.Sequential(nn.Conv2d(in_channels, out_channels,1, bias=False),
                                       nn.BatchNorm2d(out_channels, momentum=0.99))
                                       # no activation!!
    def forward(self, x):
        x = self.depthwise(x)
        if self.seblock is not None:
            x = self.seblock(x)
        x = self.pointwise(x)
        return x

class InvertedBlock(nn.Module):
    def __init__(self, in_channels, exp_channels, out_channels, kernel_size, stride, use_se, use_hs):
        super().__init__()

        self.use_skip_connect = (stride==1 and in_channels==out_channels)

        layers = []
        if in_channels != exp_channels: # 채널 안늘어날 때는 1x1 생략. 즉, 1x1은 채널을 키워야할 때만 존재한다.
            layers += [nn.Sequential(nn.Conv2d(in_channels, exp_channels, 1, bias=False),
                                     nn.BatchNorm2d(exp_channels, momentum=0.99),
                                     nn.Hardswish(inplace=True) if use_hs else nn.ReLU(inplace=True))]
        layers += [DepSESep(exp_channels, out_channels, kernel_size, use_se, use_hs, stride=stride)]

        self.residual = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_skip_connect:
            return x + self.residual(x) # 더하고 ReLU 하지 않는다! 그래야 linear block이 되는 거니까
        else:
            return self.residual(x)

class MobileNetV3(nn.Module):
    def __init__(self, cfgs, last_channels, num_classes=1000, width_mult=1.):
        super().__init__()

        
        in_channels = _make_divisible(16 * width_mult, 8)

        # building first layer
        self.stem_conv = nn.Sequential(nn.Conv2d(3, in_channels, 3, padding=1, stride=2, bias=False),
                                       nn.BatchNorm2d(in_channels, momentum=0.99),
                                       nn.Hardswish(inplace=True)) # 처음건 무조건 HS, HS를 써서 16으로 줄일 수 있었다 함
        
        # building inverted residual blocks
        layers=[]
        for k, t, c, use_se, use_hs, s in cfgs:
            exp_channels = _make_divisible(in_channels * t, 8)
            out_channels = _make_divisible(c * width_mult, 8)
            layers += [InvertedBlock(in_channels, exp_channels, out_channels, k, s, use_se, use_hs)]
            in_channels = out_channels
        self.layers = nn.Sequential(*layers)

        # building last several layers
        self.last_conv = nn.Sequential(nn.Conv2d(in_channels, exp_channels, 1, bias=False),
                                       nn.BatchNorm2d(exp_channels, momentum=0.99),
                                       nn.Hardswish(inplace=True))
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        last_channels = _make_divisible(last_channels * width_mult, 8)
        self.classifier = nn.Sequential(nn.Linear(exp_channels, last_channels),
                                        nn.Hardswish(inplace=True),
                                        nn.Dropout(p=0.2, inplace=True),
                                        nn.Linear(last_channels, num_classes)) # MLP 부활

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight) # nn.init.constant_(m.weight, 1) 말고 이런 방법도 있음
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.stem_conv(x)
        x = self.layers(x)
        x = self.last_conv(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def mobilenetv3_large(**kwargs):
    cfgs = [#k,   t,   c,   SE,   HS,   s 
            # 이전 output에 t를 곱해서 exp size가 되는 것임! 그냥 숫자를 안쓰고 t를 쓰는 이유는 width_mult이 1이 아닐 때를 위해
            [3,   1,  16, False, False, 1],
            [3,   4,  24, False, False, 2],
            [3,   3,  24, False, False, 1],
            [5,   3,  40, True,  False, 2],
            [5,   3,  40, True,  False, 1],
            [5,   3,  40, True,  False, 1],
            [3,   6,  80, False, True,  2],
            [3, 2.5,  80, False, True,  1],
            [3, 2.3,  80, False, True,  1],
            [3, 2.3,  80, False, True,  1],
            [3,   6, 112, True,  True,  1],
            [3,   6, 112, True,  True,  1],
            [5,   6, 160, True,  True,  2],
            [5,   6, 160, True,  True,  1],
            [5,   6, 160, True,  True,  1]]

    return MobileNetV3(cfgs, last_channels=1280, **kwargs)

def mobilenetv3_small(**kwargs):
    cfgs = [#k,    t,   c,  SE,    HS,   s 
            [3,    1,  16, True,  False, 2],
            [3,  4.5,  24, False, False, 2],
            [3, 3.67,  24, False, False, 1],
            [5,    4,  40, True,  True,  2],
            [5,    6,  40, True,  True,  1],
            [5,    6,  40, True,  True,  1],
            [5,    3,  48, True,  True,  1],
            [5,    3,  48, True,  True,  1],
            [5,    6,  96, True,  True,  2],
            [5,    6,  96, True,  True,  1],
            [5,    6,  96, True,  True,  1]]

    return MobileNetV3(cfgs, last_channels=1024, **kwargs)

In [10]:
model = mobilenetv3_large()

from torchinfo import summary
summary(model, input_size=(2, 3, 224, 224), device='cpu')

Layer (type:depth-idx)                                  Output Shape              Param #
MobileNetV3                                             [2, 1000]                 --
├─Sequential: 1-1                                       [2, 16, 112, 112]         --
│    └─Conv2d: 2-1                                      [2, 16, 112, 112]         432
│    └─BatchNorm2d: 2-2                                 [2, 16, 112, 112]         32
│    └─Hardswish: 2-3                                   [2, 16, 112, 112]         --
├─Sequential: 1-2                                       [2, 160, 7, 7]            --
│    └─InvertedBlock: 2-4                               [2, 16, 112, 112]         --
│    │    └─Sequential: 3-1                             [2, 16, 112, 112]         464
│    └─InvertedBlock: 2-5                               [2, 24, 56, 56]           --
│    │    └─Sequential: 3-2                             [2, 24, 56, 56]           3,440
│    └─InvertedBlock: 2-6                              

In [11]:
x = torch.randn(2, 3, 224, 224)
print(model(x).shape)

torch.Size([2, 1000])


----------------------------------------------------------------------------------------------------------------------------------------

## Using pretrained model: MobileNet V3

In [12]:
model_mobilenetv3 = torchvision.models.mobilenet_v3_large(pretrained=True)
model_mobilenetv3

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to C:\Users\57187/.cache\torch\hub\checkpoints\mobilenet_v3_large-8738ca79.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=22139423.0), HTML(value='')))




MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bi