# (imgenet데이터로 사전학된)VGG모델의 전이학습
- 클래스가 10개짜리인 CIFAR10 데이터에 사전학습된 모델을 적용
- prestrained model의 conv층은 그대로 사용하고 fc층만 수정

In [None]:
import torchs

In [None]:
from torchvision.transforms.transforms import Normalize
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.optim.adam import Adam
from torchvision.models.vgg import vgg16
from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import RandomHorizontalFlip, RandomResizedCrop, Normalize, Compose, Resize, ToTensor
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets.utils import Iterator

import tqdm

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
model = vgg16(pretrained = True) # vgg를 불러옴 -> model을 불렀을때 Dropout이 있다 이것은 overfiting을 뜻함



In [None]:
model # model을 불렀을때 Dropout이 있다 이것은 overfiting을 뜻함 -> outputclass -> 1000개

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
fc = nn.Sequential(nn.Linear(512*7*7, 4096),
                   nn.ReLU(),
                   nn.Dropout(),
                   nn.Dropout(),
                   nn.Linear(4096, 4096),
                   nn.ReLU(),
                   nn.Dropout(),
                   nn.Linear(4096, 10))


In [None]:
model.classifier = fc

In [None]:
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
model.state_dict().keys() # 파라미터는 features

odict_keys(['features.0.weight', 'features.0.bias', 'features.2.weight', 'features.2.bias', 'features.5.weight', 'features.5.bias', 'features.7.weight', 'features.7.bias', 'features.10.weight', 'features.10.bias', 'features.12.weight', 'features.12.bias', 'features.14.weight', 'features.14.bias', 'features.17.weight', 'features.17.bias', 'features.19.weight', 'features.19.bias', 'features.21.weight', 'features.21.bias', 'features.24.weight', 'features.24.bias', 'features.26.weight', 'features.26.bias', 'features.28.weight', 'features.28.bias', 'classifier.0.weight', 'classifier.0.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.7.weight', 'classifier.7.bias'])

In [None]:
# Freeze parameters -> 설정한 파라미터를 사용하지 않음. 쓰는 이유 -> 계산할 파라미터를 줄여서 학습속도를 향상시키는 방법
for name, para in model.named_parameters(): # 이름과 파라미터를 준다
  if name.split('.')[0] == 'classifier': # 여기서는 features를 의미함
    print(name)
    pass # features면 넘어감
  else:
    para.requires_grad = False # grad(기울기)를 업데이트 하지않음 즉 w(가중치)를 업데이트하지 않음

classifier.0.weight
classifier.0.bias
classifier.4.weight
classifier.4.bias
classifier.7.weight
classifier.7.bias


In [None]:
# transforms 준비
transforms = Compose([Resize(224),
                      RandomResizedCrop((224,224)),
                      RandomHorizontalFlip(p = 0.5),
                      ToTensor(),
                      Normalize(mean = (0.5,0.5,0.5), std = (0.5,0.5,0.5))]) # 정규화하기

In [None]:
from IPython.testing import test
# 데이터 준비

training_data = CIFAR10(root = './', train = True, download = True, transform= transforms)
test_data = CIFAR10(root = './', train = True, download = True, transform= transforms)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# 데이터로더
train_loader = DataLoader(training_data, batch_size = 32, shuffle = True)
test_loader = DataLoader(test_data, batch_size = 32, shuffle = False)

In [None]:
from torchvision.datasets.utils import Iterator
# 모델 힉슴
lr = 1e-4
optim = Adam(model.parameters(), lr = lr)

for epoch in range(3): # epoch 단위
  iterator = tqdm.tqdm(train_loader)
  for data, label in iterator: # batch 단위
    data = data.to(device)
    label = label.to(device)
  # print(data.shape) -> 간단한 디버깅 방법

    preds = model(data)
    loss = nn.CrossEntropyLoss()(preds, label)

    optim.zero_grad()
    loss.backward()
    optim.step()

    iterator.set_description(f"epoch: {epoch+1} loss:{loss.item()}")

torch.save(model.state_dict(), "ResNet.pth") #모델 저장

epoch: 1 loss:1.2138859033584595: 100%|██████████| 1563/1563 [07:24<00:00,  3.52it/s]
epoch: 2 loss:0.9904044270515442: 100%|██████████| 1563/1563 [07:12<00:00,  3.62it/s]
epoch: 3 loss:1.547538161277771: 100%|██████████| 1563/1563 [07:11<00:00,  3.62it/s]


In [None]:
# 모델 평가
#model.load_state_dict(torch.load("CIFAR_pretrained.pth", map_location= device))

num_corr = 0
with torch.no_grad(): # 평가나는 기울기 업데이트를 하지 않음
  for data, label in test_loader:
    data = data.to(device)
    label = label.to(device)

    output= model(data)
    _, preds = output.data.max(1) #dims이여서 1을 넣어줌

    corr= preds.eq(label.data).sum().item() # eq는 같냐 다르냐를 확인 , 값만 가지고 오고 싶을때 item을 사용
    num_corr += corr

  print(f"Acc:{num_corr/ len(test_loader)}")

Acc:20.555982085732566
