# VGG 모델 만들기
- 모델의 구조를 보면 MaxPool를 기준으로 block단위로 나눠져있음
- block형태를 활용해서 모델 설계

![VGG](vgg.png)

In [9]:
import torch
import torch.nn as nn
import torchvision

from torchsummary import summary

# 단순한 방법

### 1 block 메소드

In [10]:
def build_feature_block(
    in_channel=3,
    out_channel=64,
    num_cnn=3
):
    layers = []
    for n in range(num_cnn):
        layers.append(nn.Conv2d(in_channel, out_channel, 3, padding=1))
        in_channel = out_channel
    
    layers.append(nn.MaxPool2d(2))
    
    return layers

In [11]:
features = build_feature_block()
features

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)]

In [12]:
img = torch.rand(1,3,32,32)
model = nn.Sequential(*features)
output = model(img)
output.shape

torch.Size([1, 64, 16, 16])

### VGG-11

In [13]:
def vgg11(
    in_channel=3,
    num_cnn_list=[1,1,2,2,2],
    channel_list=[64,128,256,512,512],
    num_classes=10
):
    features = []
    for num_cnn, channel in zip(num_cnn_list, channel_list):
        
        features += build_feature_block(
            in_channel=in_channel,
            out_channel=channel,
            num_cnn=num_cnn)
        
        in_channel = channel
    
    flatten = [nn.Flatten()]
    
    classifier = []
    classifier += [nn.Linear(512*7*7, 4096)]
    classifier += [nn.Linear(4096, 4096)]
    classifier += [nn.Linear(4096, 1000)]
    
    layers = features + flatten + classifier
    
    model = nn.Sequential(*layers)
    
    return model

In [14]:
model = vgg11()
model

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation

In [15]:
summary(model.cuda(), (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
         MaxPool2d-2         [-1, 64, 112, 112]               0
            Conv2d-3        [-1, 128, 112, 112]          73,856
         MaxPool2d-4          [-1, 128, 56, 56]               0
            Conv2d-5          [-1, 256, 56, 56]         295,168
            Conv2d-6          [-1, 256, 56, 56]         590,080
         MaxPool2d-7          [-1, 256, 28, 28]               0
            Conv2d-8          [-1, 512, 28, 28]       1,180,160
            Conv2d-9          [-1, 512, 28, 28]       2,359,808
        MaxPool2d-10          [-1, 512, 14, 14]               0
           Conv2d-11          [-1, 512, 14, 14]       2,359,808
           Conv2d-12          [-1, 512, 14, 14]       2,359,808
        MaxPool2d-13            [-1, 512, 7, 7]               0
          Flatten-14                [-1

In [18]:
img = torch.rand(1,3,224,224)
output = model(img.cuda())
output.shape

torch.Size([1, 1000])

In [26]:
def build_feature_block(
    in_channel=3,
    out_channel=64,
    num_cnn=3
):
    layers = []
    for n in range(num_cnn):
        layers.append(nn.Conv2d(in_channel, out_channel, 3, padding=1))
        in_channel = out_channel
    
    layers.append(nn.MaxPool2d(2))
    
    return layers

### VGG-16

In [27]:
num_cnn_list = [2,2,3,3,3]
num_channel_list = [64, 128, 256, 512, 512]

def vgg16(
    in_channel=3,
    num_cnn_list=[2,2,3,3,3],
    num_channel_list=[64,128,256,512,512],
    classes=10
):
    features = []
    for num_cnn, channel in zip(num_cnn_list, num_channel_list):
        
        features += build_feature_block(
            in_channel=in_channel,
            out_channel=channel,
            num_cnn=num_cnn
        )
        
        in_channel = channel
        
    flatten = [nn.Flatten()]
    
    classifier = []
    classifier += [nn.Linear(512*7*7, 4096)]
    classifier += [nn.Linear(4096, 4096)]
    classifier += [nn.Linear(4096, classes)]
    
    layers = features + flatten + classifier
    
    return nn.Sequential(*layers)

In [28]:
vgg_16 = vgg16(classes=1000)
vgg_16

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1,

In [30]:
summary(vgg_16.cuda(), (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
            Conv2d-2         [-1, 64, 224, 224]          36,928
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 128, 112, 112]          73,856
            Conv2d-5        [-1, 128, 112, 112]         147,584
         MaxPool2d-6          [-1, 128, 56, 56]               0
            Conv2d-7          [-1, 256, 56, 56]         295,168
            Conv2d-8          [-1, 256, 56, 56]         590,080
            Conv2d-9          [-1, 256, 56, 56]         590,080
        MaxPool2d-10          [-1, 256, 28, 28]               0
           Conv2d-11          [-1, 512, 28, 28]       1,180,160
           Conv2d-12          [-1, 512, 28, 28]       2,359,808
           Conv2d-13          [-1, 512, 28, 28]       2,359,808
        MaxPool2d-14          [-1, 512,

![VGG](vgg.png)

# 파이써닉 방법

In [1]:
import torch
import torch.nn as nn

In [31]:
VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

class VGG(nn.Module):
    
    def __init__(self, in_channels, num_classes, batch_norm=False):
        super(VGG, self).__init__()
        self.in_channels = in_channels
        self.batch_norm = batch_norm
        
        self.features = self.create_conv_layers(VGG16)
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = nn.Flatten()(x)
        x = self.classifier(x)
        
        return x
    
    def create_conv_layers(self, config):
        layers = []
        in_channels = self.in_channels
        batch_norm = self.batch_norm
        
        for out_channels in config:
            # convolution
            if type(out_channels) == int:
                conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
                
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                
                in_channels = out_channels
                
            # maxpooling
            else:
                layers += [nn.MaxPool2d(2)]
    
        return nn.Sequential(*layers)

In [32]:
vgg16 = VGG(in_channels=3, num_classes=1000)
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [33]:
from torchsummary import summary

summary(vgg16.cuda(), (3, 244, 244))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 244, 244]           1,792
              ReLU-2         [-1, 64, 244, 244]               0
            Conv2d-3         [-1, 64, 244, 244]          36,928
              ReLU-4         [-1, 64, 244, 244]               0
         MaxPool2d-5         [-1, 64, 122, 122]               0
            Conv2d-6        [-1, 128, 122, 122]          73,856
              ReLU-7        [-1, 128, 122, 122]               0
            Conv2d-8        [-1, 128, 122, 122]         147,584
              ReLU-9        [-1, 128, 122, 122]               0
        MaxPool2d-10          [-1, 128, 61, 61]               0
           Conv2d-11          [-1, 256, 61, 61]         295,168
             ReLU-12          [-1, 256, 61, 61]               0
           Conv2d-13          [-1, 256, 61, 61]         590,080
             ReLU-14          [-1, 256,

In [34]:
img = torch.rand(1,3,224,224)
output = vgg16(img.cuda())
output.shape

torch.Size([1, 1000])

# 정리
- 함수를 이용한 방법과 Class를 이용한 방법
- 파이써닉하게 설계하기 위해서는 Class 방법을 선호
- Adaptive pooling layer를 추가해서 모든 input size에 대해 