# **전이학습**

- torchvision.models as models를 이용해서 ImageNet으로 사전학습된 모델 사용

- 코드 출처: https://yeong-jin-data-blog.tistory.com/entry/%ED%8C%8C%EC%9D%B4%ED%86%A0%EC%B9%98-%EC%8A%A4%ED%84%B0%EB%94%94-%EC%A0%84%EC%9D%B4%ED%95%99%EC%8A%B5-%EB%AA%A8%EB%8D%B8-%ED%94%84%EB%A6%AC%EC%A7%95
(조금 수정)

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
 
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

# GPU 사용여부 확인
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# **Model Freezing**
- 기존 모델의 변수를 그대로 사용하기 위해서 업데이트가 이루어지지 않도록 지정하는 방법. 
- 사전 학습 모델의 변수를 그대로 유지. 
- 학습 속도와 정확도를 향상시키거나 다른 모델과 붙여서 다른 구조 만들기 가능. (이미지 인식: 피쳐 추출은 기존 모델 사용, 분류기만 다른 방식 대체 가능)

In [2]:
# AlexNet에 맞추어 데이터 크기 조정
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
 
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True) 
 
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
# AlexNet 구조와 사전학습된 파라미터 가져오기
model_frz = torchvision.models.alexnet(pretrained=True)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


  0%|          | 0.00/233M [00:00<?, ?B/s]

In [4]:
# 가져온 모델의 분류기 부분 확인
model_frz.classifier

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [5]:
#model.features[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
 
num_ftrs = model_frz.classifier[6].in_features # fc의 입력 노드 수를 산출한다. 
model_frz.classifier[6] = nn.Linear(num_ftrs, 10) # fc를 nn.Linear(num_ftrs, 10)로 대체한다.
model_frz = model_frz.to(device)

model_frz.classifier

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=10, bias=True)
)

In [6]:
# 모델 파라미터(weight, bias 목록 출력)
for i, (name, param) in enumerate(model_frz.named_parameters()):  
    print(i,name)

0 features.0.weight
1 features.0.bias
2 features.3.weight
3 features.3.bias
4 features.6.weight
5 features.6.bias
6 features.8.weight
7 features.8.bias
8 features.10.weight
9 features.10.bias
10 classifier.1.weight
11 classifier.1.bias
12 classifier.4.weight
13 classifier.4.bias
14 classifier.6.weight
15 classifier.6.bias


In [7]:
# 모델 프리징
# 합성곱 층은 0~9까지이다. 따라서 9번째 변수까지 역추적을 비활성화 한 후 for문을 종료한다.
 
for i, (name, param) in enumerate(model_frz.named_parameters()):
    param.requires_grad = False
    if i == 9:
        print('end')
        break

end


In [8]:
# requires_grad 확인
f_list = [0, 3, 6, 8, 10] #피쳐맵 파라미터 
c_list = [1, 4, 6] #분류기 파라미터 
 
for i in f_list:
    print(model_frz.features[i].weight.requires_grad)
    print(model_frz.features[i].bias.requires_grad)
    
for j in c_list:
    print(model_frz.classifier[j].weight.requires_grad)
    print(model_frz.classifier[j].bias.requires_grad)

False
False
False
False
False
False
False
False
False
False
True
True
True
True
True
True


In [9]:
# 모델 학습
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_frz.classifier.parameters(), lr=1e-4, weight_decay=1e-2)
# 5 에폭마다 0.1씩 학습률 감소
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

for epoch in range(10):
 
    running_loss = 0.0
    for data in trainloader:
        
        inputs, labels = data[0].to(device), data[1].to(device)
          
        optimizer.zero_grad()
        outputs = model_frz(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        running_loss += loss.item()
 
    cost = running_loss / len(trainloader)        
    print('[%d] loss: %.3f' %(epoch + 1, cost))  
 
print('Finished Training')

[1] loss: 0.734
Finished Training


In [10]:
# 예측
correct = 0
total = 0
with torch.no_grad():
    model_frz.eval()
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model_frz(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
 
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 78 %
