# 파이토치

- Width Multiplier α는 각 채널에 α를 곱하고 int 형으로 바꿔주면 될 것 같은데 그러면 뭔가 안 맞게됨.


- Resolution Multiplier ρ는 feature map 사이즈 자체를 건드리는건데, 내가 아는건 단순히 stride와 padding만 조절해서 feature map 사이즈를 건드리는 것뿐. 텐서 연산 수준에서 뭔가가 필요하지 않을까 생각.


- 따라서 구현한건 가장 기본적인 MobileNet.

In [1]:
import torch
import torch.nn as nn


class ConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size, stride, padding):
        super(ConvBlock, self).__init__()
        
        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_ch)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        
        return x
    
    
class Conv_dw(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1):
        super(Conv_dw, self).__init__()
        
        self.depthwise = nn.Sequential(
                nn.Conv2d(in_ch, in_ch, kernel_size, stride, padding, groups=in_ch, bias=False),
                nn.BatchNorm2d(in_ch),
                nn.ReLU())
        
        self.pointwise = nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_ch),
                nn.ReLU())
        
    def forward(self, x):
        
        x = self.depthwise(x)
        x = self.pointwise(x)
        
        return x
    
    
class MobileNet(nn.Module):
    def __init__(self, a, num_classes=1000):
        super(MobileNet, self).__init__()
        
        self.conv1 = ConvBlock(3, 32, kernel_size=3, stride=2, padding=1)
        
        self.sepconv1 = nn.Sequential(
                Conv_dw(32, 64),
                Conv_dw(64, 128, stride=2),
                Conv_dw(128, 128),
                Conv_dw(128, 256, stride=2),
                Conv_dw(256, 256),
                Conv_dw(256, 512, stride=2))
        
        self.sepconv2 = Conv_dw(512, 512)
        
        self.sepconv3 = nn.Sequential(
                Conv_dw(512, 1024, stride=2),
                Conv_dw(1024, 1024, stride=2))
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.sepconv1(x)
        
        for _ in range(5):
            x = self.sepconv2(x)
            
        x = self.sepconv3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.linear(x)
        
        return x

In [2]:
if __name__ == '__main__':

    from torchsummary import summary
    model = MobileNet(a=0.5)
    summary(model, (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              ReLU-3         [-1, 32, 112, 112]               0
         ConvBlock-4         [-1, 32, 112, 112]               0
            Conv2d-5         [-1, 32, 112, 112]             288
       BatchNorm2d-6         [-1, 32, 112, 112]              64
              ReLU-7         [-1, 32, 112, 112]               0
            Conv2d-8         [-1, 64, 112, 112]           2,048
       BatchNorm2d-9         [-1, 64, 112, 112]             128
             ReLU-10         [-1, 64, 112, 112]               0
          Conv_dw-11         [-1, 64, 112, 112]               0
           Conv2d-12           [-1, 64, 56, 56]             576
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,