# VGGNet

## 실습 목표
- VGGNet을 사용하여 이미지를 학습하고 10개의 카테고리를 갖는 이미지를 분류하는 이미지 분류기를 생성한다. (데이터셋: [CIFAR](https://pytorch.org/vision/0.9/datasets.html#cifar))
- Pre-training 모델의 사용방법을 이해한다.

## 문제 정의
- VGGNet 구조 살펴보기



![VGG](https://miro.medium.com/max/1100/0*6VP81rFoLWp10FcG)

## 주요 코드



### 1. VGGNet

```
# Model
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    def __init__(self, vgg_name):   #vgg_name 추가 변수
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 360),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(360, 100),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(100, 10),
        )

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    # 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

```

### 2. Pretrained model

```
from torchvision import models

vgg16 = models.vgg16(pretrained=True)
vgg16.to(DEVICE)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vgg16.classifier.parameters(), lr = LEARNING_RATE, momentum=0.9)
```

## CIFAR Classifier(VGGNet)

- CIFAR 데이터셋을 사용하여 이미지에 포함된 object가 무엇인지 분류하는 이미지 분류기를 생성해봅니다.


### [Step1] Load libraries & Datasets

In [91]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader
from torch import nn

from torchvision import datasets
from torchvision.transforms import transforms
from torchvision.transforms.functional import to_pil_image

### [Step2] Data preprocessing

불러온 이미지의 증강을 통해 학습 정확도를 향상시키도록 합니다.

* RandomCrop
* RandomHorizontalFlip
* Normalize

In [92]:
transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_img = datasets.CIFAR10(
    root='data',
    train=True,
    download=True,
    transform=transforms
)

test_img = datasets.CIFAR10(
    root='data',
    train=False,
    download=True,
    transform=transforms
)

Files already downloaded and verified
Files already downloaded and verified


### [Step3] Set hyperparameters

In [93]:
EPOCH = 10
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
# DEVICE = 'cpu'
DEVICE = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu')
print('Using Device:', DEVICE)

Using Device: mps:0


### [Step4] Create DataLoader

In [94]:
train_loader = DataLoader(train_img, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_img, batch_size=BATCH_SIZE, shuffle=False)

### [Step5] Set Network Structure

In [95]:
# Model
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Sequential(
            nn.Linear(25088 * 1 * 1, 360),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(360, 100),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(100, 10),
        )

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

### [Step6] Create Model instance

In [96]:
model = VGG('VGG16').to(DEVICE)
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

### [Step7] Model compile

In [97]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

### [Step8] Set train loop

In [98]:
def train(train_loader, model, loss_fn, optimizer):
    model.train()

    size = len(train_loader.dataset)

    for batch, (X, y) in enumerate(train_loader):
        X, y = X.to(DEVICE), y.to(DEVICE)
        pred = model(X)

        # Compute loss
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'loss: {loss:>7f} [{current:>5d}/{size:>5d}]')

### [Step9] Set test loop

In [99]:
def test(test_loader, model, loss_fn):
    model.eval()

    size = len(test_loader.dataset)
    num_batches = len(test_loader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
    test_loss /= num_batches
    correct /= size
    print(f'Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')

### [Step10] Run model

In [100]:
for i in range(EPOCH):
    print(f'Epoch {i + 1}\n-------------------------------')
    train(train_loader, model, loss, optimizer)
    test(test_loader, model, loss)
print('Done!')

Epoch 1
-------------------------------


loss: 2.353894 [    0/50000]
loss: 2.299620 [ 3200/50000]
loss: 2.251302 [ 6400/50000]
loss: 2.157037 [ 9600/50000]
loss: 1.962983 [12800/50000]
loss: 2.033034 [16000/50000]
loss: 1.984756 [19200/50000]
loss: 1.889907 [22400/50000]
loss: 2.155015 [25600/50000]
loss: 1.890409 [28800/50000]
loss: 1.609838 [32000/50000]
loss: 2.117489 [35200/50000]
loss: 1.688444 [38400/50000]
loss: 1.529558 [41600/50000]
loss: 1.566147 [44800/50000]
loss: 2.040133 [48000/50000]
Test Error: 
 Accuracy: 45.7%, Avg loss: 1.531206 

Epoch 2
-------------------------------
loss: 1.970236 [    0/50000]
loss: 1.724916 [ 3200/50000]
loss: 1.671883 [ 6400/50000]
loss: 1.508912 [ 9600/50000]
loss: 1.738776 [12800/50000]
loss: 1.502121 [16000/50000]
loss: 1.553996 [19200/50000]
loss: 1.604620 [22400/50000]
loss: 1.828905 [25600/50000]
loss: 1.680809 [28800/50000]
loss: 1.480579 [32000/50000]
loss: 1.717993 [35200/50000]
loss: 1.555670 [38400/50000]
loss: 1.279361 [41600/50000]
loss: 1.547433 [44800/50000]
loss: 1.4

## CIFAR Classifier(Pretrained VGGNet)

- ImageNet 데이터로 학습한 VGGNet을 사용하여 주어진 데이터 셋에서 사용할 수 있도록 Fine tuning 해봅니다.


In [101]:
from torchvision import models

vgg16 = models.vgg16(pretrained=True)
vgg16.to(DEVICE)
print(vgg16)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/woojin/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100.0%


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [102]:
vgg16.classifier[6].out_features = 10       # output 1000 -> 10

for param in vgg16.features.parameters():
    param.requires_grad = False             # gradient 계산 X

In [103]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vgg16.classifier.parameters(), lr=LEARNING_RATE)

In [104]:
for i in range(EPOCH):
    print(f'Epoch {i + 1}\n-------------------------------')
    train(train_loader, vgg16, loss, optimizer)
    test(test_loader, vgg16, loss)
print('Done!')

Epoch 1
-------------------------------
loss: 12.551889 [    0/50000]
loss: 3.251927 [ 3200/50000]
loss: 2.015601 [ 6400/50000]
loss: 1.694982 [ 9600/50000]
loss: 1.599399 [12800/50000]
loss: 1.391162 [16000/50000]
loss: 1.288280 [19200/50000]
loss: 1.563966 [22400/50000]
loss: 1.057321 [25600/50000]
loss: 1.063684 [28800/50000]
loss: 0.966865 [32000/50000]
loss: 0.902289 [35200/50000]
loss: 1.260174 [38400/50000]
loss: 0.836121 [41600/50000]
loss: 0.915865 [44800/50000]
loss: 1.167854 [48000/50000]
Test Error: 
 Accuracy: 73.7%, Avg loss: 0.767432 

Epoch 2
-------------------------------
loss: 0.862775 [    0/50000]
loss: 0.809094 [ 3200/50000]
loss: 0.946023 [ 6400/50000]
loss: 0.912410 [ 9600/50000]
loss: 0.963073 [12800/50000]
loss: 0.619301 [16000/50000]
loss: 0.768700 [19200/50000]
loss: 0.891546 [22400/50000]
loss: 0.648050 [25600/50000]
loss: 1.146434 [28800/50000]
loss: 0.665186 [32000/50000]
loss: 0.562400 [35200/50000]
loss: 0.843945 [38400/50000]
loss: 1.079673 [41600/5000