In [1]:
import pandas as pd
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import tensorflow.keras as keras
import os
import torchvision.models as models
from tensorflow.keras.applications.resnet50 import ResNet50
import time
from torchsummary import summary as summary_

# 코드 다시 돌리기 위한 seed 고정
import random
import numpy as np
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# ResNet

기존의 baseline 모델에서 정확도를 높이기 위해 찾아본 결과 resnet을 적용해보기로 하였다.
  
ResNet은 Residual neural network로 이름처럼 잔차(residual)과 관련이 있는 모델이다.
이 모델은 기존의 방식으로 신경망 층을 깊게 쌓는다고 해서 성능이 좋아지는 것은 아님을 실제로 확인하였다. 따라서 Residual Block을 도입하여 기존의 망과 차이가 있다면 입력값을 출력값에 더해줄 수 있도록 하나의 지름길(shortcut)을 만들어주었다. 

In [2]:
train_transform = transforms.Compose([    
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
])        

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
])    

train = torchvision.datasets.CIFAR100(root="./", train=True, download=True, transform=train_transform)
test = torchvision.datasets.CIFAR100(root="./", train=False, download=True, transform=test_transform)

train_loader = torch.utils.data.DataLoader(train, batch_size=256,
                                           shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test, batch_size=256,
                                          shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [13]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

def test():
    net = ResNet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())

test()

torch.Size([1, 100])


ResNet을 18층, 50층짜리 2가지 종류로 만들어 직접 모델을 돌려보았다.

### ResNet18

In [4]:
model = ResNet18().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [5]:
summary_(model,(3,32,32),batch_size=256)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [256, 64, 32, 32]           1,728
       BatchNorm2d-2          [256, 64, 32, 32]             128
            Conv2d-3          [256, 64, 32, 32]          36,864
       BatchNorm2d-4          [256, 64, 32, 32]             128
            Conv2d-5          [256, 64, 32, 32]          36,864
       BatchNorm2d-6          [256, 64, 32, 32]             128
        BasicBlock-7          [256, 64, 32, 32]               0
            Conv2d-8          [256, 64, 32, 32]          36,864
       BatchNorm2d-9          [256, 64, 32, 32]             128
           Conv2d-10          [256, 64, 32, 32]          36,864
      BatchNorm2d-11          [256, 64, 32, 32]             128
       BasicBlock-12          [256, 64, 32, 32]               0
           Conv2d-13         [256, 128, 16, 16]          73,728
      BatchNorm2d-14         [256, 128,

In [6]:
def train(model, train_loader):
    model.train()
    train_loss = 0
    correct = 0

    for image, label in train_loader:
        image = image.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        prediction = output.max(1, keepdim = True)[1]
        correct += prediction.eq(label.view_as(prediction)).sum().item()
      
    train_loss /= len(train_loader)
    train_accuracy = 100. * correct / len(train_loader.dataset)
    return train_loss, train_accuracy

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0 
    correct = 0 

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(device)
            label = label.to(device)
            output = model(image)
            test_loss += criterion(output, label).item() 
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()

    test_loss /= len(test_loader)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [8]:
result_list = []
EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    
    train_loss, train_accuracy = train(model, train_loader)
    val_loss, val_accuracy = evaluate(model, test_loader)
    
    if epoch % 5 == 0:
        print(f"[EPOCH: {epoch}], \tTrain Loss: {train_loss:.4f}, \tTrain Accuracy: {train_accuracy:.2f} %, \tVal Loss: {val_loss:.4f}, \tVal Accuracy: {val_accuracy:.2f} % \n")
    
    result = {
        'EPOCH': epoch,
        'Train Loss': train_loss,
        'Train Accuracy': train_accuracy,
        'Val Loss':val_loss,
        'Val Accuracy': val_accuracy}
  
    result_list.append(result)
result_df = pd.DataFrame(result_list)

[EPOCH: 5], 	Train Loss: 1.9320, 	Train Accuracy: 47.54 %, 	Val Loss: 2.2521, 	Val Accuracy: 41.99 % 

[EPOCH: 10], 	Train Loss: 0.2887, 	Train Accuracy: 91.03 %, 	Val Loss: 2.9028, 	Val Accuracy: 46.54 % 

[EPOCH: 15], 	Train Loss: 0.0845, 	Train Accuracy: 97.41 %, 	Val Loss: 3.4352, 	Val Accuracy: 47.59 % 

[EPOCH: 20], 	Train Loss: 0.0610, 	Train Accuracy: 98.09 %, 	Val Loss: 3.6661, 	Val Accuracy: 47.41 % 

[EPOCH: 25], 	Train Loss: 0.0697, 	Train Accuracy: 97.85 %, 	Val Loss: 3.6625, 	Val Accuracy: 48.40 % 

[EPOCH: 30], 	Train Loss: 0.1236, 	Train Accuracy: 95.97 %, 	Val Loss: 3.9380, 	Val Accuracy: 45.92 % 

[EPOCH: 35], 	Train Loss: 0.0367, 	Train Accuracy: 98.86 %, 	Val Loss: 4.2864, 	Val Accuracy: 47.27 % 

[EPOCH: 40], 	Train Loss: 0.0327, 	Train Accuracy: 98.98 %, 	Val Loss: 4.2173, 	Val Accuracy: 48.28 % 

[EPOCH: 45], 	Train Loss: 0.0425, 	Train Accuracy: 98.64 %, 	Val Loss: 4.2272, 	Val Accuracy: 47.01 % 

[EPOCH: 50], 	Train Loss: 0.0638, 	Train Accuracy: 98.03 %, 	Val 

KeyboardInterrupt: 

기존의 baseline 모델에서는 100 epoch를 돌렸을 때 약 20%의 성능이 나왔다면 resnet18의 모델은 약 48%로 2배이상이 증가함을 확인할 수 있다.

(노트북에서 모델을 돌렸을 때 5 epcoh마다 2시간씩 걸려서 중간에 멈췄습니다.. 더군다나 이번 과제의 경우 60퍼센트 이상의 val acc을 나타냈어야 했기 때문에 55 epoch에서 중단한 후 더 깊은 층을 가진 resnet50을 돌려보습니다.)

### ResNet50

In [14]:
model = ResNet50().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [15]:
summary_(model,(3,32,32),batch_size=256)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [256, 64, 32, 32]           1,728
       BatchNorm2d-2          [256, 64, 32, 32]             128
            Conv2d-3          [256, 64, 32, 32]           4,096
       BatchNorm2d-4          [256, 64, 32, 32]             128
            Conv2d-5          [256, 64, 32, 32]          36,864
       BatchNorm2d-6          [256, 64, 32, 32]             128
            Conv2d-7         [256, 256, 32, 32]          16,384
       BatchNorm2d-8         [256, 256, 32, 32]             512
            Conv2d-9         [256, 256, 32, 32]          16,384
      BatchNorm2d-10         [256, 256, 32, 32]             512
       Bottleneck-11         [256, 256, 32, 32]               0
           Conv2d-12          [256, 64, 32, 32]          16,384
      BatchNorm2d-13          [256, 64, 32, 32]             128
           Conv2d-14          [256, 64,

  total_output += np.prod(summary[layer]["output_shape"])


In [17]:
result_list = []
EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    
    train_loss, train_accuracy = train(model, train_loader)
    val_loss, val_accuracy = evaluate(model, test_loader)
    print(f"[EPOCH: {epoch}], \tTrain Loss: {train_loss:.4f}, \tTrain Accuracy: {train_accuracy:.2f} %, \tVal Loss: {val_loss:.4f}, \tVal Accuracy: {val_accuracy:.2f} % \n")
    
    result = {
        'EPOCH': epoch,
        'Train Loss': train_loss,
        'Train Accuracy': train_accuracy,
        'Val Loss':val_loss,
        'Val Accuracy': val_accuracy}
  
    result_list.append(result)
result_df = pd.DataFrame(result_list)

[EPOCH: 1], 	Train Loss: 4.4882, 	Train Accuracy: 3.37 %, 	Val Loss: 4.1415, 	Val Accuracy: 5.46 % 

[EPOCH: 2], 	Train Loss: 3.9648, 	Train Accuracy: 7.91 %, 	Val Loss: 3.8317, 	Val Accuracy: 10.86 % 

[EPOCH: 3], 	Train Loss: 3.6366, 	Train Accuracy: 12.94 %, 	Val Loss: 3.4984, 	Val Accuracy: 15.66 % 

[EPOCH: 4], 	Train Loss: 3.3053, 	Train Accuracy: 19.14 %, 	Val Loss: 3.2328, 	Val Accuracy: 20.33 % 

[EPOCH: 5], 	Train Loss: 2.9965, 	Train Accuracy: 24.71 %, 	Val Loss: 2.9559, 	Val Accuracy: 26.01 % 

[EPOCH: 6], 	Train Loss: 2.6611, 	Train Accuracy: 31.43 %, 	Val Loss: 2.6054, 	Val Accuracy: 32.84 % 

[EPOCH: 7], 	Train Loss: 2.3653, 	Train Accuracy: 37.40 %, 	Val Loss: 2.3864, 	Val Accuracy: 37.52 % 

[EPOCH: 8], 	Train Loss: 2.1254, 	Train Accuracy: 42.82 %, 	Val Loss: 2.3393, 	Val Accuracy: 39.79 % 

[EPOCH: 9], 	Train Loss: 1.9075, 	Train Accuracy: 47.73 %, 	Val Loss: 2.1813, 	Val Accuracy: 42.83 % 

[EPOCH: 10], 	Train Loss: 1.7139, 	Train Accuracy: 52.44 %, 	Val Loss: 2.044

KeyboardInterrupt: 

ResNet50에서는 모델을 돌렸을 때 1 epoch 결과를 출력할 때까지의 시간이 2시간정도 걸렸다. 때문에 이 역시도 100 epoch까지 돌려볼 시간이 부족했지만 20 epoch까지의 결과만 보더라도 앞서 돌렸던 resnet18보다 더 높은 성능(val acc : 50%)을 보여주고 있다.

### 결론

cifar-100 데이터셋에 대해서 60% 이상의 성능을 보여주기 위해서 기존의 baseline model보다 더 좋은 모델들을 찾아보고 실행해보았다. 특히 resnet으로 모델의 성능을 직접 출력하도록 해보았고 최종적으로 resnet50에 대해서는 20 epoch까지만 하더라도 50%의 성능을 끌어올릴 수 있었다.  
따라서 이보다 더 깊은 층의 resnet101 혹은 resnet152에 대해서는 분명 60% 이상의 성능을 끌어올리기 충분할 것이라고 판단한다. 또한 resnet50에 대해서도 적절하게 파라미터를 수정해준다면 충분히 높은 성능을 보일 것이라고 생각한다.