- VGG 구현 및 정리: https://blogofth-lee.tistory.com/264
- VGGNet으로 ImageNet학습하기: https://minjoos.tistory.com/6
- VGGnet(2014) 구현하기: https://deep-learning-study.tistory.com/521
- VGGNet 논문 리뷰와 구현: https://wolfy.tistory.com/240
- VGG16 Transfer Learning - Kaggle: https://www.kaggle.com/code/carloalbertobarbano/vgg16-transfer-learning-pytorch/notebook

In [1]:
# import package
import torch
import torch.nn as nn 
import torch.nn.functional as F 
from torchsummary import summary 
from torch import optim 
from torch.optim.lr_scheduler import StepLR 

from torchvision import datasets
import torchvision.transforms as transforms 
from torch.utils.data import DataLoader

from torchvision import utils
import matplotlib.pyplot as plt 
plt.ion()   # 대화형 모드

import numpy as np
import os, time, copy

In [2]:
# specify a data path
path2data = '../data/STL10'

# if not exists the path, make the directory
if not os.path.exists(path2data):
    os.mkdir(path2data)

# load dataset
train_ds = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
val_ds = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# check train_ds
img, _ = train_ds[1]
print(img.shape)

print(len(train_ds))
print(len(val_ds))

torch.Size([3, 96, 96])
5000
8000


### 모델 구축
> VGGnet은 4가지 종류가 있습니다. 각 종류에 해당하는 정보를 딕셔너리로 만듭니다. 숫자는 conv layer를 거친 후에 출력값 채널을 의미합니다. M은 pooling layer를 의미합니다.  

In [2]:
VGG_types = {
  'VGG11': [
    64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'
  ],
  'VGG13': [
    64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512
  ],
  'VGG16': [
    64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'
  ],
  'VGG19': [
    64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'
  ]
}

In [None]:
# define VGGNet class
class VGGnet(nn.Module):
  # define a function to create conv layer taken the key of VGG type dict
  def create_conv_laters(self, architecture):
    layers = []
    in_channels = self.in_channels
    for x in architecture:
      if isinstance(x, int): # int means conv layer
        out_channels = x
        layers += [
          nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
          nn.BatchNorm2d(x),
          nn.ReLU()
        ]
        in_channels = x
      elif x == 'M':
        layers += [
          nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        ]
    
    return nn.Sequential(*layers)

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        nn.init.constant_(m.bias, 0)
  
  def __init__(self, model, in_channels=3, num_classes=10, init_weights=True):
    super().__init__()
    self.in_channels = in_channels

    # create conv layers corresponding to VGG type 
    