# Prob1: Your Best Image Classification Model

### 기본 모형 적합
먼저 사전훈련된 vgg16모델을 기본 모델로 사용하였다.\
vgg16모델을 사용한 이유는 강의자료에 예시로 쓰였기 때문에 가장 먼저 사용하였다. \
설정한 파라미터는 다음과 같다.\
batch_size =32, learning_rate = 0.0002, num_epoch = 10 \
위와 같이 설정한 이유는 강의자료에서 많이 쓰이는 숫자를 이용하였고, 에폭의 수를 키울수록 학습시간이 방대하게 늘어나기 때문에 에폭은 비교적 작은 수인 10으로 설정하였다.

In [None]:
# import module
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as tr
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets, models
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# set parameters
batch_size =32
learning_rate = 0.0002
num_epoch = 10

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=True, transform=transf)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./Data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12965555.11it/s]


Extracting ./Data/cifar-10-python.tar.gz to ./Data
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
# VGG-16
import torchvision.models as models

vgg16_ver1 = models.vgg16(weights="IMAGENET1K_V1")

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 260MB/s]


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16_ver1.parameters(), lr=learning_rate)

cuda:0


In [None]:
# CIFAR10 데이터에 맞춰 모델구조 변경
vgg16_ver1.classifier[6] = nn.Linear(in_features=4096, out_features = 10)
vgg16_ver1.to("cuda:0")

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = vgg16_ver1.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 1==0:
    print(loss)

tensor(0.8403, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.5333, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1118, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1550, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0292, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1013, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.3801, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1078, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0257, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0356, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  vgg16_ver1.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = vgg16_ver1(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 87.59 %


기본 모형에서의 Accuracy는 87.59 %를 얻었다.

### 시도1: 학습률 0.0002 -> 0.0001로 낮추기
일반적으로 학습률을 낮추면 모델 파라미터가 크게 업데이트 되지 않아 학습이 안정적이게 된다. \
또한 오버슈팅의 가능성이 낮아지므로 모델의 성능이 높아질 것이라 기대하여 학습률을 낮춰 다시 학습하였다.

In [None]:
# set parameters
batch_size = 32
learning_rate = 0.0001  # 학습률 조정
num_epoch = 10

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
# vgg-16
vgg16_ver3 = models.vgg16(weights="IMAGENET1K_V1")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16_ver3.parameters(), lr=learning_rate)

cuda:0


In [None]:
# CIFAR10 데이터에 맞춰 모델구조 변경
vgg16_ver3.classifier[6] = nn.Linear(in_features=4096, out_features = 10)
vgg16_ver3.to("cuda:0")

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = vgg16_ver3.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 1==0:
    print(loss)

tensor(0.4876, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0897, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0551, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0195, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.3531, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0923, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0089, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0177, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0009, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  vgg16_ver3.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = vgg16_ver3(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 90.57 %


기대에 맞게 Accuracy가 소폭 상승하여 90.57%를 얻었다.

### 시도2: 에폭 수 10 -> 20로 늘리기
일반적으로 에폭 수를 늘리면 모델의 일반화 능력이 향상되고, 최적해를 찾는데 더 많은 기회를 갖게 되어 모델의 성능을 향상시킬 수 있다. \
따라서 에폭 수를 늘려 다시 학습하였다.

In [None]:
# set parameters
batch_size = 32
learning_rate = 0.0001
num_epoch = 20

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=True, transform=transf)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./Data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12681599.61it/s]


Extracting ./Data/cifar-10-python.tar.gz to ./Data
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
import torchvision.models as models

In [None]:
# vgg-16
vgg16_ver4 = models.vgg16(weights="IMAGENET1K_V1")

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:06<00:00, 87.9MB/s]


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16.parameters(), lr=learning_rate)

cuda:0


In [None]:
# CIFAR10 데이터에 맞춰 모델구조 변경
vgg16_ver4.classifier[6] = nn.Linear(in_features=4096, out_features = 10)
vgg16_ver4.to("cuda:0")

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = vgg16_ver4.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 1==0:
    print(loss)

tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1195, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1610, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1517, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0554, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0100, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1153, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0080, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0078, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0121, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1159, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0003, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0339, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1401, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0159, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0009, device='cuda:0', grad_fn=

In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  vgg16_ver4.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = vgg16_ver4(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 89.61 %


기대와 달리 모델의 Accuracy가 하락하여 89.61%를 얻었다.

### 시도3: 옵티마이저 변경 Adam -> SGD
옵티마이저도 모델의 성능에 영향을 끼치기 때문에, Adam 옵티마이저가 아닌 SGD 옵티마이저를 사용하여 모델의 성능을 비교해보고자 하였다.

In [None]:
# set parameters
batch_size = 32
learning_rate = 0.0001
num_epoch = 10  # 에폭 수 다시 원래대로

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=True, transform=transf)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./Data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29352798.84it/s]


Extracting ./Data/cifar-10-python.tar.gz to ./Data
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
import torchvision.models as models

In [None]:
# vgg-16
vgg16_ver5 = models.vgg16(weights="IMAGENET1K_V1")

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 235MB/s]


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

loss_func = nn.CrossEntropyLoss()
# 옵티마이저: Adam -> SGD
optimizer = torch.optim.SGD(vgg16_ver5.parameters(), lr=learning_rate)

cuda:0


In [None]:
# CIFAR10 데이터에 맞춰 모델구조 변경
vgg16_ver5.classifier[6] = nn.Linear(in_features=4096, out_features = 10)
vgg16_ver5.to("cuda:0")

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = vgg16_ver5.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 1==0:
    print(loss)

tensor(2.1334, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.8532, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3862, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.5295, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.7066, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.9519, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.8094, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.7969, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.6328, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.5418, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  vgg16_ver5.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = vgg16_ver5(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 83.44 %


Adam 옵티마이저를 썼을 때와 비교하여 SGD 옵티마이저를 썼을 때 Accuracy가 83.44%로 하락하였다.

### 시도4: GoogLeNet 사용하기
다른 모델이 vgg16 모델보다 더 높은 성능을 내는지 비교해보고자 GoogLeNet 모델을 사용하였다.\
추가로 위에서 epoch를 20으로 할 때 10보다 성능이 낮았지만, 별 차이가 나지 않아 epoch를 15로 설정하였다. \
옵티마이저는 성능이 좋았던 Adam 옵티마이저를 사용하였다.

In [None]:
# set parameters
batch_size = 32
learning_rate = 0.0001
num_epoch = 15

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=True, transform=transf)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./Data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 44353581.96it/s]


Extracting ./Data/cifar-10-python.tar.gz to ./Data
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
import torchvision.models as models

In [None]:
# GoogleNet
GoolgleNet_ver6 = models.googlenet(weights="IMAGENET1K_V1")

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 214MB/s]


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(GoolgleNet_ver6.parameters(), lr=learning_rate)

cuda:0


In [None]:
GoolgleNet_ver6.to("cuda:0")

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = GoolgleNet_ver6.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 1==0:
    print(loss)

tensor(0.0152, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0015, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0163, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0011, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0020, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0140, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0022, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0097, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  GoolgleNet_ver6.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = GoolgleNet_ver6(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 93.87 %


GoogLeNet모델의 Accuracy는 93.87%로 vgg16모델보다 상승하였다.

### 시도 5: 배치사이즈 32 -> 64로 늘리기
배치사이즈는 학습 과정에서 한번에 처리하는 데이터의 수로, 배치사이즈가 크면 그래디언트 추정이 안정적이다.\
따라서 모델의 성능이 높아질 것이라 예상하여 배치사이즈를 늘려 모델을 다시 학습하였다.

In [None]:
# set parameters
batch_size = 64
learning_rate = 0.0001
num_epoch = 15

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
import torchvision.models as models

In [None]:
# GoogleNet
GoolgleNet_ver7 = models.googlenet(weights="IMAGENET1K_V1")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(GoolgleNet_ver7.parameters(), lr=learning_rate)

cuda:0


In [None]:
GoolgleNet_ver7.to("cuda:0")

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = GoolgleNet_ver7.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 1==0:
    print(loss)

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0074, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0134, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0074, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0060, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0014, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0005, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0722, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0324, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0004, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  GoolgleNet_ver7.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = GoolgleNet_ver7(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 94.16 %


모델의 Accuracy가 94.16%로 소폭 상승하였다. \
따라서 최종 모형으로 성능이 94.16%인 시도5(GoogLeNet) 모형을 택하였다.

In [None]:
# 시도5(GoogLeNet) 모형 다운로드 받기
torch.save(GoolgleNet_ver7, './2016400_정다운.pt')

In [None]:
# 모형 다운로드 받기
torch.save(model, './정다운.pt')

# Prob2: Image Classification with the New Dataset

### 기본 모형 적합
먼저 Resnet을 기본 모델로 사용하였다.\
Resnet을 기본 모델로 사용한 이유는 CNN 모델 중 성능이 잘 나오는 편이라고 알고 있기 때문이다. \
설정한 파라미터는 다음과 같다.\
batch_size =64, learning_rate = 0.001, num_epoch = 100 \
위와 같이 설정한 이유는 강의자료에서 많이 쓰이는 숫자를 이용하였고, 사전학습된 모델보다 학습 속도가 빠른 만큼 에폭 수도 100으로 설정하였다. \
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25])은 정규화를 하는 것이 성능에 좋을 것이라 판단하여 추가하였다. \
BasicBlock과 Resnet 구현 과정에서 채널 수나 블록 수 등은 모두 GPU 사용량을 고려하여 시도와 실패를 반복해가며 적절한 수로 설정하였다. \
활성화 함수로 ReLu함수를 사용한 이유는 활성화 함수 중 성능이 좋다고 알려져 있기 때문이다.

In [None]:
# set parameters
batch_size = 64
learning_rate = 0.001
num_epoch = 100

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.STL10(root='./Data', split='train', download=True, transform=transf)
testset = torchvision.datasets.STL10(root='./Data', split='test', download=True, transform=transf)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./Data/stl10_binary.tar.gz


100%|██████████| 2640397119/2640397119 [02:44<00:00, 16066587.92it/s]


Extracting ./Data/stl10_binary.tar.gz to ./Data
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
# ResNet 모델 만들기
class BasicBlock(nn.Module):
  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
                           stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != planes:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(planes)
          )
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out

In [None]:
class ResNet(nn.Module):
  def __init__(self, num_classes=10, num_blocks=2):
    super(ResNet, self).__init__()
    self.in_planes = 16

    self.conv1 = nn.Conv2d(3, 16, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(16)
    self.layer1 = self._make_layer(16, num_blocks, stride=1)
    self.layer2 = self._make_layer(32, num_blocks, stride=2)
    self.layer3 = self._make_layer(64, num_blocks, stride=2)
    self.linear = nn.Linear(64, num_classes)

  def _make_layer(self, planes, num_blocks, stride):
    strides = [stride] + [1]* (num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(BasicBlock(self.in_planes, planes, stride))
      self.in_planes = planes
    return nn.Sequential(*layers)

  def forward(self,x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = F.avg_pool2d(out,8)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model_ver1 = ResNet().to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_ver1.parameters(), lr=learning_rate)

cuda:0


In [None]:
# STL10 데이터에 맞춰 모델구조 변경
model_ver1.linear = nn.Linear(3136, 10)
model_ver1.to("cuda:0")

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = model_ver1.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 5==0:
    print(loss)

tensor(2.3820, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.9176, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.5703, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2785, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2183, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1190, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1157, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0700, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0462, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0476, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0268, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0250, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0253, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0130, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0093, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0042, device='cuda:0', grad_fn=

In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  model_ver1.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = model_ver1(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 63.34 %


기본 모형의 Accuracy는 63.34%로 사전학습된 모델에 비해 성능이 좋지 않다.

### 시도1: transforms.Resize의 크기를 256 -> 160로 변경
STL10 데이터셋은 96x96으로 작은 크기를 가지므로, 256x256으로 확대하는 것보다 160x160으로 좀 더 작게 확대하는 것이 정보의 손실을 줄일 것이라 예측하여 Resize의 크기를 변경하였다.


In [None]:
# set parameters
batch_size = 64
learning_rate = 0.001
num_epoch = 100

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(160),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.STL10(root='./Data', split='train', download=True, transform=transf)
testset = torchvision.datasets.STL10(root='./Data', split='test', download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model_ver2 = ResNet().to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_ver2.parameters(), lr=learning_rate)

cuda:0


In [None]:
# STL10 데이터에 맞춰 모델구조 변경
model_ver2.linear = nn.Linear(3136, 10)
model_ver2.to("cuda:0")

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = model_ver2.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 5==0:
    print(loss)

tensor(2.1016, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.0139, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.3820, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2069, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2156, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1098, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1096, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0428, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0359, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0277, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0364, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0235, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0154, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0107, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0076, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0034, device='cuda:0', grad_fn=

In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  model_ver2.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = model_ver2(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 63.91 %


변경 전과 큰 차이는 없지만, Accuracy가 63.91%로 소폭 상승한 결과를 보인다.

### 시도2: 합성곱 층 1개 추가 (총 2개의 합성곱 층)
합성곱 층을 추가하여 모델이 깊어지면 성능이 높아질 것으로 예상하여 합성곱 층을 추가하였다. \
합성곱 층의 채널 수와 커널 사이즈 등은 모두 경험적으로 선택하였다.\
활성화함수로는 위와 같은 이유로 ReLu함수를 사용하였다.

In [None]:
# set parameters
batch_size = 64
learning_rate = 0.001
num_epoch = 100

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(160),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.STL10(root='./Data', split='train', download=True, transform=transf)
testset = torchvision.datasets.STL10(root='./Data', split='test', download=True, transform=transf)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./Data/stl10_binary.tar.gz


100%|██████████| 2640397119/2640397119 [02:47<00:00, 15761752.15it/s]


Extracting ./Data/stl10_binary.tar.gz to ./Data
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
# ResNet 모델 만들기
class BasicBlock(nn.Module):
  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
                           stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != planes:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(planes)
          )
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out

In [None]:
class ResNet(nn.Module):
  def __init__(self, num_classes=10, num_blocks=2):
    super(ResNet, self).__init__()
    self.in_planes = 16

    self.conv1 = nn.Conv2d(3, 16, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(16)
    self.layer1 = self._make_layer(16, num_blocks, stride=1)
    self.layer2 = self._make_layer(32, num_blocks, stride=2)
    self.layer3 = self._make_layer(64, num_blocks, stride=2)
    self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)  # 새로운 합성곱 층 추가
    self.bn2 = nn.BatchNorm2d(128)  # 배치 정규화 추가
    self.linear = nn.Linear(128, num_classes)

  def _make_layer(self, planes, num_blocks, stride):
    strides = [stride] + [1]* (num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(BasicBlock(self.in_planes, planes, stride))
      self.in_planes = planes
    return nn.Sequential(*layers)

  def forward(self,x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = F.relu(self.bn2(self.conv2(out)))  # 활성화함수 추가
    out = F.avg_pool2d(out,8)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model_ver3 = ResNet().to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_ver3.parameters(), lr=learning_rate)

cuda:0


In [None]:
# STL10 데이터에 맞춰 모델구조 변경
model_ver3.linear = nn.Linear(6272, 10)
model_ver3.to("cuda:0")

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = model_ver3.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 5==0:
    print(loss)

tensor(2.1285, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.2314, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.5112, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2270, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1467, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0917, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0701, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0430, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0295, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0247, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0161, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0112, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0024, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0016, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0011, device='cuda:0', grad_fn=

In [None]:
# Model Evaluation
correct = 0
total = 0
with torch.no_grad():
  model_ver3.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = model_ver3(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 72.79 %


합성곱 층을 추가하여, Accuracy가 72.79%로 크게 상승하였다.

### 시도3: 합성곱 2개 추가 (총 3개의 합성곱 층)
합성곱 층의 개수를 추가한 것이 효과가 좋아 합성곱 층을 한 개 더 추가하여 총 3개의 합성곱 층의 적합을 시도하였다. \
합성곱 층의 채널 수나 커널 사이즈, 활성화 함수의 선택은 시도2와 같은 이유로 선택하였다.

In [None]:
# set parameters
batch_size = 64
learning_rate = 0.001
num_epoch = 100

In [None]:
# Data preparation
# 이미지 크기 변환 후 텐서로 변환하는 전처리
transf = transforms.Compose([
    transforms.Resize(160),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]),
])

In [None]:
# download dataset
trainset = torchvision.datasets.STL10(root='./Data', split='train', download=True, transform=transf)
testset = torchvision.datasets.STL10(root='./Data', split='test', download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size)
testloader = DataLoader(testset, batch_size=batch_size)

In [None]:
# ResNet 모델 만들기
class BasicBlock(nn.Module):
  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
                           stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != planes:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(planes)
          )
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out

In [None]:
class ResNet(nn.Module):
  def __init__(self, num_classes=10, num_blocks=2):
    super(ResNet, self).__init__()
    self.in_planes = 16

    self.conv1 = nn.Conv2d(3, 16, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(16)
    self.layer1 = self._make_layer(16, num_blocks, stride=1)
    self.layer2 = self._make_layer(32, num_blocks, stride=2)
    self.layer3 = self._make_layer(64, num_blocks, stride=2)
    self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)  # 새로운 합성곱 층 추가
    self.bn2 = nn.BatchNorm2d(128)  # 배치정규화 추가
    self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=False)  # 새로운 합성곱 층 추가
    self.bn3 = nn.BatchNorm2d(256)  # 배치정규화 추가
    self.linear = nn.Linear(256, num_classes)

  def _make_layer(self, planes, num_blocks, stride):
    strides = [stride] + [1]* (num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(BasicBlock(self.in_planes, planes, stride))
      self.in_planes = planes
    return nn.Sequential(*layers)

  def forward(self,x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = F.relu(self.bn2(self.conv2(out)))  # 활성화함수 추가
    out = F.relu(self.bn3(self.conv3(out)))  # 활성화함수 추가
    out = F.avg_pool2d(out,8)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model_ver4 = ResNet().to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_ver4.parameters(), lr=learning_rate)

cuda:0


In [None]:
# STL10 데이터에 맞춰 모델구조 변경
model_ver4.linear = nn.Linear(12544, 10)
model_ver4.to("cuda:0")

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(trainloader):
    x = image.to(device)
    y_ = label.to(device)

    optimizer.zero_grad()
    output = model_ver4.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

  if i % 5==0:
    print(loss)

tensor(1.8377, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2532, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1353, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0657, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0710, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0273, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0100, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0082, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0020, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0013, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0010, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0008, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0006, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0004, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0003, device='cuda:0', grad_fn=

In [None]:
correct = 0
total = 0
with torch.no_grad():
  model_ver4.eval()
  for data in testloader:
    images, labels = data[0].to(device), data[1].to(device)
    outputs = model_ver4(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print('Test accuracy: %.2f %%'%(100*correct/total))

Test accuracy: 74.64 %


합성곱 층을 1개 더 추가(총 3개의 합성곱)하여 74.64%의 Accuracy를 얻었다.
이를 최종 모형으로 선택한다.

# 최종 모델

### Prob1의 최종 모델 성능
최종성능 94.16%로 시도5의 GoolgleNet_ver7(GoogLeNet모델)에서 가장 높은 성능을 보였다. \
(GoogLeNet모델 이름 지정 과정에서 오타가 있어 GoolgleNet_ver7로 저장되었습니다. 유의해주시면 감사하겠습니다.)

### Prob2의 최종 모델 성능
최종 성능 74.64%로 시도3의 model_ver4(총 3개의 합성곱 층)에서 가장 높은 성능을 보였다.