In [1]:
import os
import urllib.request
import pickle
from sklearn.model_selection import train_test_split

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np
import time
from copy import deepcopy # Add Deepcopy for args
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
filename = "cifar-100-python.tar.gz"
urllib.request.urlretrieve(url, filename)

# x: extract: 압축 풀기 (c: 압축하기), v: view, 과정 자세하게 보기, z:압축 f: 파일
#이를 운영체제 명령어로 바꿔 운영체제에게 전달
os.system("tar xvzf " + filename)


def unpickle(file):
  # with 벗어나면 알아서 파일이 닫히게끔 함
  # read binary(이진법으로 읽기)
    with open(file, 'rb') as f:
        dict = pickle.load(f, encoding='bytes')
    return dict

# 메타데이터
meta = unpickle('cifar-100-python/meta')
fine_label_names = [t.decode('utf8') for t in meta[b'fine_label_names']]

#데이터셋
data = unpickle('cifar-100-python/train')
test = unpickle('cifar-100-python/test')

#6:2로 트레인 테스트 나누기
x_train, x_val, y_train, y_val = train_test_split(data[b'data'], data[b'fine_labels'], test_size=0.25, random_state=41)
#테스트셋
x_test = test[b'data']
y_test = test[b'fine_labels']

# #높이 너비 채널 순
# def reshape_image(image):
#     return np.transpose(np.reshape(image,(3, 32,32)), (1,2,0))

In [3]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms

def cifar100_dataset(data, labels, transform=None):
    transformed_data = []
    for image, label in zip(data, labels):
        # 이미지 데이터를 (높이, 너비, 채널 수) 형태로 변환
        image = image.reshape(3, 32, 32).transpose((1, 2, 0))
        if transform:
            image = transform(image)
        transformed_data.append((image, label))
    return transformed_data

# 이미지 데이터를 PyTorch tensor로 변환하고 정규화하는 변환
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_train1 = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),           # 무작위로 좌우 반전
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_train2 = transforms.Compose([
    transforms.ToTensor(),                       # 이미지를 PyTorch Tensor로 변환
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Color Jittering
    transforms.GaussianBlur(kernel_size=3),  # Random Gaussian Blur
    transforms.RandomHorizontalFlip(),           # 무작위로 좌우 반전
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 이미지 정규화
])

# 훈련 데이터와 검증 데이터 변환
trainset1 = cifar100_dataset(np.array(x_train), y_train, transform_train1)
trainset2 = cifar100_dataset(np.array(x_train), y_train, transform_train2)
combined_trainset = ConcatDataset([trainset1, trainset2])

valset1 = cifar100_dataset(np.array(x_val), y_val, transform_train1)
valset2 = cifar100_dataset(np.array(x_val), y_val, transform_train2)
combined_valset = ConcatDataset([valset1, valset2])

testset = cifar100_dataset(np.array(x_test),y_test, transform_test)

# 훈련 세션에 사용할 데이터 파티션
partition = {'train': combined_trainset, 'val': combined_valset, 'test':testset}

In [49]:
def conv1x1(in_planes, out_planes, stride=1):#1x1 컨볼루젼
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

def conv3x3(in_planes, out_planes, stride=1):#3x3 컨볼루젼
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()

        self.inplanes = inplanes
        self.planes = planes

        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):

        identity = x

        out = self.relu(self.bn1(self.conv1(x)))#컨배렐
        out = self.bn2(self.conv2(out))#컨배
        if self.downsample is not None:# conv통과하면서 사이즈 작아지면 레지듀얼 브랜치 다운샘플링(크기 맞춰주기)
            identity = self.downsample(x)

        out += identity #레지듀얼브랜치값+ 컨배렐컨배 결과값
        out = self.relu(out) #비선형함수 렐루에 넣기(결국 컨배렐컨배렐 순서임 ※렐루 전에 배치놂 해야함)

        return out


In [50]:
class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes=100, zero_init_residual=False):
    super(ResNet, self).__init__()
    self.inplanes = 64
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.dropout = nn.Dropout(p=0.5)

    self.layer1 = self._make_layer(block, 128, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(256 * block.expansion, num_classes)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)

    # Zero-initialize the last BN in each residual branch,
    # so that the residual branch starts with zeros, and each residual block behaves like an identity.
    if zero_init_residual:
      for m in self.modules():
        if isinstance(m, Bottleneck):
          nn.init.constant_(m.bn3.weight, 0)
        elif isinstance(m, BasicBlock):
          nn.init.constant_(m.bn2.weight, 0)


  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
          conv1x1(self.inplanes, planes * block.expansion, stride),
          nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for _ in range(1, blocks):
      layers.append(block(self.inplanes, planes))
    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.dropout(x)
    x = self.fc(x)
    return x

In [51]:
def train(net, partition, optimizer, criterion, args):
  trainloader = torch.utils.data.DataLoader(partition['train'],
                                            batch_size=args.train_batch_size, shuffle=True, num_workers=2)
  net.train()

  correct = 0
  total = 0
  train_loss = 0.0

  for i, data in enumerate(trainloader, 0):
    optimizer.zero_grad()
    # get the inputs
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    outputs = net(inputs)

    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  train_loss = train_loss / len(trainloader)
  train_acc = 100 * correct / total

  return net, train_loss, train_acc


In [52]:
def validate(net, partition, criterion, args):
  valloader = torch.utils.data.DataLoader(partition['val'],
                                           batch_size=args.test_batch_size, shuffle=False, num_workers=2)
  net.eval()

  correct = 0
  total = 0
  val_loss = 0

  with torch.no_grad():
    for data in valloader:
      images, labels = data
      images = images.cuda()
      labels = labels.cuda()
      outputs = net(images)

      loss = criterion(outputs, labels)

      val_loss += loss.item()
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    val_loss = val_loss / len(valloader)
    val_acc = 100 * correct / total

  return val_loss, val_acc


In [53]:
def test(net, partition, args):
  testloader = torch.utils.data.DataLoader(partition['test'],
                                            batch_size=args.test_batch_size, shuffle=False, num_workers=2)
  net.eval()
  correct = 0
  total = 0

  with torch.no_grad():
    for data in testloader:
      images, labels = data
      images = images.cuda()
      labels = labels.cuda()
      outputs = net(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    test_acc = 100 * correct / total
  return test_acc

In [54]:
def experiment(partition, args):
  net = ResNet(BasicBlock, [2,2,2,2])
  net.cuda()
  criterion = nn.CrossEntropyLoss()

  if args.optim == 'SGD':
    optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.l2)
  elif args.optim == 'RMSprop':
    optimizer = optim.RMSprop(net.parameters(), lr=args.lr, weight_decay=args.l2)
  elif args.optim == 'Adam':
    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.l2)
  else:
    raise ValueError('In-valid optimizer choice')

  train_losses = []
  val_losses = []
  train_accs = []
  val_accs = []

  for epoch in range(args.epoch): # loop over the dataset multiple times
    ts = time.time()
    net, train_loss, train_acc = train(net, partition, optimizer, criterion, args)
    val_loss, val_acc = validate(net, partition, criterion, args)
    te = time.time()

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}. Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))

  test_acc = test(net, partition, args)
  print(test_acc, '!!!!!!!')
  result = {}
  result['train_losses'] = train_losses
  result['val_losses'] = val_losses
  result['train_accs'] = train_accs
  result['val_accs'] = val_accs
  result['train_acc'] = train_acc
  result['val_acc'] = val_acc
  result['test_acc'] = test_acc

  return vars(args), result, args.l2, args.lr, args.epoch, test_acc


In [None]:
import hashlib
import json
from os import listdir
from os.path import isfile, join
import pandas as pd

def save_exp_result(setting, result, l2, lr, epoch):
  exp_name = setting['exp_name']

  hash_key = hashlib.sha1(str(setting).encode()).hexdigest()[:6]
  filename = './l2:{}-lr:{}-epoch:{}-{}.json'.format(l2,lr, epoch, hash_key)
  result.update(setting)

  with open(filename, 'w') as f:
    json.dump(result, f)

def load_exp_result(exp_name):
  dir_path = './results'
  filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
  list_result = []

  for filename in filenames:
    if exp_name in filename:
      with open(join(dir_path, filename), 'r') as infile:
        results = json.load(infile)
        list_result.append(results)

  df = pd.DataFrame(list_result) # .drop(columns=[])
  return df

parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_lr"

# ====== Model Capacity ====== #
args.act = 'relu'

# ====== Regularization ======= #
args.l2 = 0.013

# ====== Optimizer & Training ====== #
args.optim = 'Adam' #'RMSprop' #SGD, RMSprop, ADAM...

args.lr = 0.00016
args.epoch = 80

args.train_batch_size = 64
args.test_batch_size = 1024

setting, result, l2, lr, epoch, res = experiment(partition, args)

Epoch 0, Acc(train/val): 12.71/22.12, Loss(train/val) 3.84/3.26. Took 16.82 sec
Epoch 1, Acc(train/val): 25.35/28.26, Loss(train/val) 3.08/2.94. Took 17.44 sec


In [None]:
def save_exp_result(setting, result, l2, lr, epoch, res):
  exp_name = setting['exp_name']

  hash_key = hashlib.sha1(str(setting).encode()).hexdigest()[:6]
  filename = './{}-l2:{}-lr:{}-epoch:{}-{}.json'.format(res, l2,lr, epoch, hash_key)
  result.update(setting)

  with open(filename, 'w') as f:
    json.dump(result, f)

save_exp_result(setting, result, l2, lr, epoch, res)