In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn # torch.nn 클래스와 함수
import torch.nn.functional as F
import torch.optim as optim # 다양한 optimization
import argparse
import numpy as np
import time
from copy import deepcopy
import seaborn as sns
import matplotlib.pyplot as plt
import urllib.request # 파일 다운로드
import tarfile # 파일 추출
from sklearn.model_selection import train_test_split # 무작위로 데이터 나누기
import os
import pickle

In [4]:
url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
path = "./cifar-100.tar.gz" # 다운로드할 경로
urllib.request.urlretrieve(url, path) # 데이터셋 다운
with tarfile.open(path, 'r:gz') as tar: # 압축파일 열기
    tar.extractall() # 압축 해제

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(), # 텐서로 이미지 변환
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # 평균, 표준편차 0.5로 각 채널 정규화
])

# meta data (클래스 정보)
with open('cifar-100-python/meta', 'rb') as fo: # 이진모드로 읽음
    meta = pickle.load(fo, encoding="bytes") # byte 문자열 디코딩

# train data (사진 정보)
with open('cifar-100-python/train', 'rb') as fo:
    train = pickle.load(fo, encoding="bytes")

# 하위 레이블 이름 디코딩 -> 리스트로 변환
fine_label_names = [label.decode('utf8') for label in meta[b'fine_label_names']]

# 함수를 사용하여 데이터셋 분할
train_img, val_img, train_label, val_label = train_test_split(train[b'data'], train[b'fine_labels'], test_size=0.2, random_state=30)

# 데이터셋 정의
class MakeDataSet(torch.utils.data.Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx): # 인덱스에 해당하는 이미지 데이터, 레이블 반환
        image = self.data[idx].reshape(3, 32, 32).transpose(1, 2, 0)
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# 데이터셋 생성
trainset = MakeDataSet(train_img, train_label, transform=transform)
valset = MakeDataSet(val_img, val_label, transform=transform)

# 파티션 생성
partition = {'train': trainset, 'val': valset}

In [6]:
# VGG 네트워크 아키텍처 정의
# 64, 128.. 개의 필터를 가진 conv layer
# MaxPooling

cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

In [7]:
class CNN(nn.Module):

    # 생성자 메서드
    def __init__(self, model_code, in_channels, out_dim, act, use_bn):
        super(CNN, self).__init__()

        # activation func
        if act == 'relu':
            self.act = nn.ReLU()
        elif act == 'sigmoid':
            self.act = nn.Sigmoid()
        elif act == 'tanh':
            self.act = nn.TanH()
        else:
            raise ValueError("Not a valid activation function code")

        # layer 생성
        self.layers = self._make_layers(model_code, in_channels, use_bn)
        # classifier 생성, 여기서는 fullyconnected
        self.classifer = nn.Sequential(nn.Linear(512, 256),
                                       self.act,
                                       nn.Linear(256, out_dim))
    # forward 함수
    def forward(self, x):
        x = self.layers(x)
        x = x.view(x.size(0), -1)
        x = self.classifer(x)
        return x

    # 컨볼루션 레이어 생성
    def _make_layers(self, model_code, in_channels, use_bn):
        layers = []
        for x in cfg[model_code]:
            # M 일때는 MaxPooling
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            # 숫자일 때는 conv layer
            else:
                layers += [nn.Conv2d(in_channels=in_channels,
                                     out_channels=x,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1)]
                # batch norm 사용
                if use_bn:
                    layers += [nn.BatchNorm2d(x)]
                layers += [self.act]
                in_channels = x

        # 생성된 레이어 return
        return nn.Sequential(*layers)

In [8]:
def train(net, partition, optimizer, criterion, args):

    # 미니배치로 나누기, 데이터 섞기, 병렬 처리
    trainloader = torch.utils.data.DataLoader(partition['train'],
                                              batch_size=args.train_batch_size,
                                              shuffle=True, num_workers=2)

    # train 모드
    net.train()

    # 변수 초기화
    correct = 0
    total = 0
    train_loss = 0.0

    # 미니배치 순회
    for i, data in enumerate(trainloader, 0):
        optimizer.zero_grad()

        # 데이터 가져오기 및 GPU로 이동
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = net(inputs)

        # loss, gradient 계산 및 파라미터 업데이트
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # train loss 축적
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1) # 예측된 클래스 가져오기
        total += labels.size(0)
        correct += (predicted == labels).sum().item() # 맞은 샘플 수 업데이트

    train_loss = train_loss / len(trainloader) # 전체 train loss 미니배치 개수로 나눔
    train_acc = 100 * correct / total # 정확도 계산
    return net, train_loss, train_acc


In [9]:
def validate(net, partition, criterion, args):
    valloader = torch.utils.data.DataLoader(partition['val'],
                                            batch_size=args.test_batch_size,
                                            shuffle=False, num_workers=2)
    # test 모드
    net.eval()

    correct = 0
    total = 0
    val_loss = 0
    with torch.no_grad(): # gradient 추적 안 함
        # 미니배치 순회
        for data in valloader:
            images, labels = data
            images = images.cuda()
            labels = labels.cuda()
            outputs = net(images)

            loss = criterion(outputs, labels) # output, label간 loss 계산

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(valloader) # val 데이터셋에서의 loss
        val_acc = 100 * correct / total # 정확도
    return val_loss, val_acc


In [10]:
# train 및 test 수행
def experiment(partition, args):

    # model 생성
    net = CNN(model_code = args.model_code,
              in_channels = args.in_channels,
              out_dim = args.out_dim,
              act = args.act,
              use_bn = args.use_bn)
    # GPU이동
    net.cuda()

    # loss함수로 crossEntropy사용
    criterion = nn.CrossEntropyLoss()

    # optimizer 선택
    if args.optim == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.l2) # learning rate 및 norm
    elif args.optim == 'RMSprop':
        optimizer = optim.RMSprop(net.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        raise ValueError('In-valid optimizer choice')

    # 중간 내용 저장할 리스트
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    # 주어진 에폭 수 만큼 돌기
    for epoch in range(args.epoch):
        ts = time.time() # 평가 시간 측정
        net, train_loss, train_acc = train(net, partition, optimizer, criterion, args)
        val_loss, val_acc = validate(net, partition, criterion, args)
        te = time.time()

        # list에 결과 저장
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}. Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))

    # 모델의 weight 저장
    torch.save(net.state_dict(), 'model_weights.pth')

    # 결과 dic에 저장
    result = {}
    result['train_losses'] = train_losses
    result['val_losses'] = val_losses
    result['train_accs'] = train_accs
    result['val_accs'] = val_accs
    result['train_acc'] = train_acc
    result['val_acc'] = val_acc

    return vars(args), result


In [12]:
# ====== Random Seed Initialization ====== #
seed = 123 # 재현성 위해 랜덤 시드 초기화
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser() # 파서 생성
args = parser.parse_args("")
# args.exp_name = "exp1_lr_model_code"

# ====== Model ====== #
args.model_code = 'VGG11' # 모델 코드
args.in_channels = 3 # 컬러 이미지
args.out_dim = 100 # 100개의 클래스
args.act = 'relu'

# ====== Regularization ======= #
args.l2 = 0.0001 # L2 정규화
args.use_bn = True # 배치 정규화

# ====== Optimizer & Training ====== #
args.optim = 'Adam' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.0015 # learning rate
args.epoch = 15 # 에폭

args.train_batch_size = 256 # 배치 크기
args.test_batch_size = 1024

# ====== Experiment Variable ====== #
# name_var1 = 'lr'
# name_var2 = 'model_code'
# list_var1 = [0.0001, 0.00001]
# list_var2 = ['VGG11', 'VGG13']


# for var1 in list_var1:
#     for var2 in list_var2:
#         setattr(args, name_var1, var1)
#         setattr(args, name_var2, var2)
#         print(args)

setting, result = experiment(partition, deepcopy(args)) # 실험 수행

Epoch 0, Acc(train/val): 3.02/5.24, Loss(train/val) 4.32/4.09. Took 10.09 sec
Epoch 1, Acc(train/val): 8.47/9.69, Loss(train/val) 3.77/3.77. Took 10.49 sec
Epoch 2, Acc(train/val): 15.36/17.83, Loss(train/val) 3.36/3.24. Took 10.45 sec
Epoch 3, Acc(train/val): 22.69/15.89, Loss(train/val) 2.97/3.59. Took 10.32 sec
Epoch 4, Acc(train/val): 29.22/28.25, Loss(train/val) 2.65/2.74. Took 10.30 sec
Epoch 5, Acc(train/val): 35.27/32.10, Loss(train/val) 2.36/2.66. Took 10.71 sec
Epoch 6, Acc(train/val): 41.07/36.41, Loss(train/val) 2.11/2.41. Took 10.17 sec
Epoch 7, Acc(train/val): 46.60/36.92, Loss(train/val) 1.88/2.47. Took 10.57 sec
Epoch 8, Acc(train/val): 51.29/38.85, Loss(train/val) 1.68/2.37. Took 10.61 sec
Epoch 9, Acc(train/val): 56.65/35.97, Loss(train/val) 1.46/2.83. Took 10.54 sec
Epoch 10, Acc(train/val): 61.74/41.69, Loss(train/val) 1.26/2.39. Took 10.59 sec
Epoch 11, Acc(train/val): 67.03/42.98, Loss(train/val) 1.06/2.49. Took 10.85 sec
Epoch 12, Acc(train/val): 72.54/43.01, Los