# VGG 모델 만들기
- 모델의 구조를 보면 MaxPool를 기준으로 block단위로 나눠져있음
- block형태를 활용해서 모델 설계

![VGG](vgg.png)

In [9]:
import torch
import torch.nn as nn
import torchvision

from torchsummary import summary

# 단순한 방법

### 1 block 메소드

In [10]:
def build_feature_block(
    in_channel=3,
    out_channel=64,
    num_cnn=3
):
    layers = []
    for n in range(num_cnn):
        layers.append(nn.Conv2d(in_channel, out_channel, 3, padding=1))
        in_channel = out_channel
    
    layers.append(nn.MaxPool2d(2))
    
    return layers

In [11]:
features = build_feature_block()
features

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)]

In [12]:
img = torch.rand(1,3,32,32)
model = nn.Sequential(*features)
output = model(img)
output.shape

torch.Size([1, 64, 16, 16])

### VGG-11

In [13]:
def vgg11(
    in_channel=3,
    num_cnn_list=[1,1,2,2,2],
    channel_list=[64,128,256,512,512],
    num_classes=10
):
    features = []
    for num_cnn, channel in zip(num_cnn_list, channel_list):
        
        features += build_feature_block(
            in_channel=in_channel,
            out_channel=channel,
            num_cnn=num_cnn)
        
        in_channel = channel
    
    flatten = [nn.Flatten()]
    
    classifier = []
    classifier += [nn.Linear(512*7*7, 4096)]
    classifier += [nn.Linear(4096, 4096)]
    classifier += [nn.Linear(4096, 1000)]
    
    layers = features + flatten + classifier
    
    model = nn.Sequential(*layers)
    
    return model

In [14]:
model = vgg11()
model

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation

In [15]:
summary(model.cuda(), (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
         MaxPool2d-2         [-1, 64, 112, 112]               0
            Conv2d-3        [-1, 128, 112, 112]          73,856
         MaxPool2d-4          [-1, 128, 56, 56]               0
            Conv2d-5          [-1, 256, 56, 56]         295,168
            Conv2d-6          [-1, 256, 56, 56]         590,080
         MaxPool2d-7          [-1, 256, 28, 28]               0
            Conv2d-8          [-1, 512, 28, 28]       1,180,160
            Conv2d-9          [-1, 512, 28, 28]       2,359,808
        MaxPool2d-10          [-1, 512, 14, 14]               0
           Conv2d-11          [-1, 512, 14, 14]       2,359,808
           Conv2d-12          [-1, 512, 14, 14]       2,359,808
        MaxPool2d-13            [-1, 512, 7, 7]               0
          Flatten-14                [-1

In [18]:
img = torch.rand(1,3,224,224)
output = model(img.cuda())
output.shape

torch.Size([1, 1000])

In [26]:
def build_feature_block(
    in_channel=3,
    out_channel=64,
    num_cnn=3
):
    layers = []
    for n in range(num_cnn):
        layers.append(nn.Conv2d(in_channel, out_channel, 3, padding=1))
        in_channel = out_channel
    
    layers.append(nn.MaxPool2d(2))
    
    return layers

### VGG-16

In [27]:
num_cnn_list = [2,2,3,3,3]
num_channel_list = [64, 128, 256, 512, 512]

def vgg16(
    in_channel=3,
    num_cnn_list=[2,2,3,3,3],
    num_channel_list=[64,128,256,512,512],
    classes=10
):
    features = []
    for num_cnn, channel in zip(num_cnn_list, num_channel_list):
        
        features += build_feature_block(
            in_channel=in_channel,
            out_channel=channel,
            num_cnn=num_cnn
        )
        
        in_channel = channel
        
    flatten = [nn.Flatten()]
    
    classifier = []
    classifier += [nn.Linear(512*7*7, 4096)]
    classifier += [nn.Linear(4096, 4096)]
    classifier += [nn.Linear(4096, classes)]
    
    layers = features + flatten + classifier
    
    return nn.Sequential(*layers)

In [28]:
vgg_16 = vgg16(classes=1000)
vgg_16

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1,

In [30]:
summary(vgg_16.cuda(), (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
            Conv2d-2         [-1, 64, 224, 224]          36,928
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 128, 112, 112]          73,856
            Conv2d-5        [-1, 128, 112, 112]         147,584
         MaxPool2d-6          [-1, 128, 56, 56]               0
            Conv2d-7          [-1, 256, 56, 56]         295,168
            Conv2d-8          [-1, 256, 56, 56]         590,080
            Conv2d-9          [-1, 256, 56, 56]         590,080
        MaxPool2d-10          [-1, 256, 28, 28]               0
           Conv2d-11          [-1, 512, 28, 28]       1,180,160
           Conv2d-12          [-1, 512, 28, 28]       2,359,808
           Conv2d-13          [-1, 512, 28, 28]       2,359,808
        MaxPool2d-14          [-1, 512,

# 파이써닉 방법

In [1]:
import torch
import torch.nn as nn

In [2]:
VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

class VGG(nn.Module):
    
    def __init__(self, in_channels, num_classes, batch_norm=False):
        super(VGG, self).__init__()
        self.in_channels = in_channels
        self.batch_norm = batch_norm
        
        self.features = self.create_conv_layers(VGG16)
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = nn.Flatten()(x)
        x = self.classifier(x)
        
        return x
    
    def create_conv_layers(self, config):
        layers = []
        in_channels = self.in_channels
        batch_norm = self.batch_norm
        
        for out_channels in config:
            # convolution
            if type(out_channels) == int:
                conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
                
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                
                in_channels = out_channels
                
            # maxpooling
            else:
                layers += [nn.MaxPool2d(2)]
    
        return nn.Sequential(*layers)

In [3]:
vgg16 = VGG(in_channels=3, num_classes=1000)
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [33]:
from torchsummary import summary

summary(vgg16.cuda(), (3, 244, 244))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 244, 244]           1,792
              ReLU-2         [-1, 64, 244, 244]               0
            Conv2d-3         [-1, 64, 244, 244]          36,928
              ReLU-4         [-1, 64, 244, 244]               0
         MaxPool2d-5         [-1, 64, 122, 122]               0
            Conv2d-6        [-1, 128, 122, 122]          73,856
              ReLU-7        [-1, 128, 122, 122]               0
            Conv2d-8        [-1, 128, 122, 122]         147,584
              ReLU-9        [-1, 128, 122, 122]               0
        MaxPool2d-10          [-1, 128, 61, 61]               0
           Conv2d-11          [-1, 256, 61, 61]         295,168
             ReLU-12          [-1, 256, 61, 61]               0
           Conv2d-13          [-1, 256, 61, 61]         590,080
             ReLU-14          [-1, 256,

In [34]:
img = torch.rand(1,3,224,224)
output = vgg16(img.cuda())
output.shape

torch.Size([1, 1000])

# 정리
- 함수를 이용한 방법과 Class를 이용한 방법
- 파이써닉하게 설계하기 위해서는 Class 방법을 선호
- Adaptive pooling layer를 추가해서 모든 input size에 대해 사용 가능함
- python파일로 작성 시도

In [1]:
import argparse

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10, MNIST

from model import VGG

In [18]:
from torchvision.models import vgg16
model = vgg16(pretrained=True)
model.classifier[6] = nn.Linear(4096, 10)

datasets = {
    'mnist': r'C:\Users\gjust\Documents\Github\data',
    'cifar10': r'C:\Users\gjust\Documents\Github\data',
    'cifar100': r'C:\Users\gjust\Documents\Github\data'
}
data_path = datasets['cifar10']

# Dataset
print('Preparing Dataset....')


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])


train_set = CIFAR10(root=data_path, transform=transform, train=True, download=True)
test_set = CIFAR10(root=data_path, transform=transform, train=False, download=True)  

train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

Preparing Dataset....
Files already downloaded and verified
Files already downloaded and verified


In [19]:
# GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# optmizer
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_func = nn.CrossEntropyLoss()

In [21]:
labels

tensor([6, 3, 8, 9, 4, 0, 3, 0, 7, 4, 9, 0, 9, 5, 5, 6], device='cuda:0')

In [22]:
outputs.argmax(axis=1)

tensor([7, 8, 5, 4, 4, 8, 8, 7, 4, 3, 9, 4, 9, 9, 1, 2], device='cuda:0')

In [25]:
sum(outputs.argmax(axis=1) == labels).item()

3

In [20]:
model.train()
model.cuda()
iter_loss = []
corrects = 0
data_size = 0

for i, (images, labels) in enumerate(train_loader):
    images, labels = images.cuda(), labels.cuda()

    optimizer.zero_grad()
    outputs = model(images)
    loss = loss_func(outputs, labels)
    loss.backward()
    optimizer.step()

    data_size += images.shape[0]
    iter_loss.append(loss.item())

    corrects += sum(outputs.argmax(axis=1) == labels).item()

    if i % 40 == 0:
        print(f'Iter[{i+1}/{len(train_loader)}] --- Loss: {sum(iter_loss)/data_size:0.4} --- Accuracy: {corrects/data_size:0.2}')

Iter[1/3125] --- Loss: 0.1648 --- Accuracy: 0.12
Iter[41/3125] --- Loss: 1.755e+05 --- Accuracy: 0.12
Iter[81/3125] --- Loss: 1.078e+05 --- Accuracy: 0.11
Iter[121/3125] --- Loss: 7.817e+04 --- Accuracy: 0.11
Iter[161/3125] --- Loss: 5.879e+04 --- Accuracy: 0.1
Iter[201/3125] --- Loss: 4.709e+04 --- Accuracy: 0.11
Iter[241/3125] --- Loss: 3.928e+04 --- Accuracy: 0.11
Iter[281/3125] --- Loss: 3.368e+04 --- Accuracy: 0.11
Iter[321/3125] --- Loss: 2.949e+04 --- Accuracy: 0.11


KeyboardInterrupt: 

In [6]:
def train(model, dataloader, optimizer, loss_func, device):
    model.train()
    iter_loss = []
    corrects = 0
    data_size = 0
    
    for i, (images, labels) in enumerate(dataloader):
        images, labels = images.cuda(), labels.cuda()
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        
        data_size += images.shape[0]
        iter_loss.append(loss.item())
        print(data_size)
        corrects += sum(outputs.argmax(axis=1) == labels).item()

        if (i+1) % 40 == 0:
            print(f'Iter[{i+1}/{len(dataloader)}] --- Loss: {sum(iter_loss)/data_size:0.4} --- Accuracy: {corrects/data_size:0.2}')
    return sum(iter_loss)/data_size, corrects/data_size

In [7]:
def test(model, dataloader, loss_func, device):
    model.eval()
    iter_loss = []
    corrects = 0
    
    with torch.no_grad():
        data_size = 0
        for i, (images, labels) in enumerate(dataloader):
            images, labels = images.cuda(), labels.cuda()
            
            outputs = model(images)
            loss = loss_func(outputs, labels)
            
            data_size += images.shape[0]
            iter_loss.append(loss.item())

            corrects += sum(outputs.argmax(axis=1) == labels).item()
    
    print(f'Iter[{i+1}/{len(dataloader)}] --- Loss: {sum(iter_loss)/data_size:0.4} --- Accuracy: {corrects/data_size:0.2}')
    return sum(iter_loss)/data_size, corrects/data_size

In [8]:
datasets = {
    'mnist': r'C:\Users\gjust\Documents\Github\data',
    'cifar10': r'C:\Users\gjust\Documents\Github\data',
    'cifar100': r'C:\Users\gjust\Documents\Github\data'
}
data_path = datasets['cifar10']

# Dataset
print('Preparing Dataset....')


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])


train_set = CIFAR10(root=data_path, transform=transform, train=True, download=True)
test_set = CIFAR10(root=data_path, transform=transform, train=False, download=True)  

train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# model
print('Preparing Model....')
model = VGG('vgg16', 3, 10, False)
model.to(device)

# optmizer
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_func = nn.CrossEntropyLoss()

Preparing Dataset....
Files already downloaded and verified
Files already downloaded and verified
Preparing Model....


In [9]:
# train

train_epoch_list, test_epoch_list = [], []
train_acc_list, test_acc_list = [], []
for e in range(20):
    print('Training....')
    train_loss, train_acc = train(model, train_loader, optimizer, loss_func, device)
    print('Teseting....')
    test_loss, test_acc = test(model, test_loader, loss_func, device)

    train_epoch_list.append(train_loss)
    test_epoch_list.append(test_loss)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)

import matplotlib.pyplot as plt

plt.figure(figsize=(10,10))
plt.plot(train_epoch_list)
plt.plot(test_epoch_list)
plt.plot(train_acc_list)
plt.plot(test_acc_list)
plt.legend(['Train_loss', 'Test_loss', 'Train_acc', 'Test_acc'])
plt.savefig('result.jpg')
# validation

# save model

print('successed!')

Training....
16
32
48
64
80
96
112
128
144
160
176
192
208
224
240
256
272
288
304
320
336
352
368
384
400
416
432
448
464
480
496
512
528
544
560
576
592
608
624
640
Iter[40/3125] --- Loss: 6.322e+07 --- Accuracy: 0.1
656
672
688
704
720
736
752
768
784
800
816
832
848
864
880
896
912
928
944
960
976
992
1008
1024
1040
1056
1072
1088
1104
1120


KeyboardInterrupt: 