In [None]:
# Colab 기본 Import
import os
import re
import math
import time
import pickle
import random
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, random_split

from google.colab import drive
drive.mount('/content/drive')

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

seed_everything()

Mounted at /content/drive


In [None]:
class BottleNext(nn.Module):
  def __init__(self, input_channel, output_channel, stride, cardinality=8, base_width=64, widen_factor=4):
    super(BottleNext, self).__init__()

    D = cardinality * int(base_width * (output_channel / (widen_factor * 64.)))
    self.conv1 = nn.Conv2d(input_channel, D, kernel_size=1, stride=1, padding=0, bias=False)
    self.bn1 = nn.BatchNorm2d(D)
    
    self.conv2 = nn.Conv2d(D, D, kernel_size=3, stride=stride, groups=cardinality, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(D)

    self.conv3 = nn.Conv2d(D, output_channel, kernel_size=1, stride=1, padding=0, bias=False)
    self.bn3 = nn.BatchNorm2d(output_channel)

    self.shortcut = nn.Sequential()  

    # ResNet과 달리 stride가 1이더라도 input_channel과 output_channel이 다를 수 있기에 조건을 변경
    if input_channel != output_channel:
      self.shortcut = nn.Sequential(
          nn.Conv2d(input_channel, output_channel, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(output_channel)
      )  
      
  def forward(self, x):
    output = self.conv1(x)
    # nn.ReLU vs F.relu : nn.ReLU는 model에 넣을 수 있는 nn.Module을 생성하고, F.relu는 그냥 Functional API로, 어떠한 값을 넣으면 relu를 통과한 값을 반환해주는 함수이다.
    output = F.relu(self.bn1(output), inplace=True) 

    output = self.conv2(output)
    output = F.relu(self.bn2(output), inplace=True)

    output = self.conv3(output)
    output = self.bn3(output)
    output += self.shortcut(x)
    output = F.relu(output, inplace=True)

    return output

In [None]:
class ResNeXt(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNeXt, self).__init__()

    self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)

    self.layers1 = self._make_layer(64, 256, BottleNext, num_blocks[0], stride=1)
    self.layers2 = self._make_layer(256, 512, BottleNext, num_blocks[1], stride=2)
    self.layers3 = self._make_layer(512, 1024, BottleNext, num_blocks[2], stride=2)

    self.avgpool = nn.AvgPool2d(8)
    self.fc = nn.Linear(1024, num_classes)

    nn.init.kaiming_normal_(self.fc.weight)

    for key in self.state_dict():
      if key.split('.')[-1] == 'weight':
          if 'conv' in key:
            nn.init.kaiming_normal_(self.state_dict()[key], mode='fan_out')
          if 'bn' in key:
            self.state_dict()[key][...] = 1
      elif key.split('.')[-1] == 'bias':
        self.state_dict()[key][...] = 0


  def _make_layer(self, input_channel, output_channel, block, num_blocks, stride):
    strides = [stride] + [1] * (num_blocks - 1)
    layers = []
    for stride in strides:
      layers.append(block(input_channel, output_channel, stride))
      input_channel = output_channel
    return nn.Sequential(*layers)

  def forward(self, x):
    out = self.conv1(x)
    out = F.relu(self.bn1(out), inplace=True)

    out = self.layers1(out)
    out = self.layers2(out)
    out = self.layers3(out)
    out = self.avgpool(out)

    out = out.view(out.size(0), -1)
    out = self.fc(out)

    return out

In [None]:
from torchvision import datasets

transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                ])

transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                ])

trainset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
classes = trainset.classes

train_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=testset, batch_size=64, shuffle=False)

print(classes)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
len(trainset), len(testset)

(50000, 10000)

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNeXt(BottleNext, [3, 3, 3]).to(DEVICE)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=0.0005, nesterov=True)
start_epoch = 0
epochs = 120
train_score = []
test_score = []
print(DEVICE)

cuda


In [None]:
if input() == "Y":
  model_data = torch.load(input())

  model.load_state_dict(model_data['model_state_dict'])

  optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True)

  optimizer.load_state_dict(model_data['optimizer_state_dict'])

  start_epoch = model_data['epoch'] + 1

Y
/content/drive/MyDrive/논문 구현/ResNeXt(2017)/ResNeXt_Cifar10_100.pth


In [None]:
import torchsummary
torchsummary.summary(model, (3, 32, 32),device="cuda")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3          [-1, 512, 32, 32]          32,768
       BatchNorm2d-4          [-1, 512, 32, 32]           1,024
            Conv2d-5          [-1, 512, 32, 32]         294,912
       BatchNorm2d-6          [-1, 512, 32, 32]           1,024
            Conv2d-7          [-1, 256, 32, 32]         131,072
       BatchNorm2d-8          [-1, 256, 32, 32]             512
            Conv2d-9          [-1, 256, 32, 32]          16,384
      BatchNorm2d-10          [-1, 256, 32, 32]             512
       BottleNext-11          [-1, 256, 32, 32]               0
           Conv2d-12          [-1, 512, 32, 32]         131,072
      BatchNorm2d-13          [-1, 512, 32, 32]           1,024
           Conv2d-14          [-1, 512,

In [9]:
for epoch in range(start_epoch, epochs):
    model.train()
    train_acc = 0
    if epoch in [100]:
      for param_group in optimizer.param_groups:
          param_group['lr'] *= 0.1
    for idx, batch in tqdm(enumerate(train_loader)):
      optimizer.zero_grad()
      X_tmp, y_tmp = batch
      X_tmp, y_tmp = X_tmp.to(DEVICE), y_tmp.to(DEVICE)
      y_pred = model(X_tmp)
      loss = loss_func(y_pred, y_tmp)

      y_pred = torch.tensor([torch.argmax(pred_list) for pred_list in y_pred], device=DEVICE)
      correct = (y_pred == y_tmp).sum().item()

      loss.backward()
      optimizer.step()
      train_acc += correct
    train_acc /= len(trainset)

    model.eval()
    test_acc = 0
    for idx, batch in enumerate(test_loader):
      X_tmp, y_tmp = batch
      X_tmp, y_tmp = X_tmp.to(DEVICE), y_tmp.to(DEVICE)
      y_pred = model(X_tmp)
      y_pred = torch.tensor([torch.argmax(pred_list) for pred_list in y_pred], device=DEVICE)
      correct = (y_pred == y_tmp).sum().item()
      test_acc += correct
    test_acc /= len(testset)

    train_score.append(round(train_acc, 4))
    test_score.append(round(test_acc, 4))
    if not epoch%10:
      torch.save({
              'epoch': epoch,
              'optimizer_state_dict': optimizer.state_dict(),
              'model_state_dict': model.state_dict(),
          }, f'./drive/MyDrive/논문 구현/ResNeXt(2017)/ResNeXt_Cifar10_{epoch:03d}.pth')
      
      with open(f"./drive/MyDrive/논문 구현/ResNeXt(2017)/train_score_{epoch:03d}.pkl","wb") as f:
        pickle.dump(train_score, f)
      with open(f"./drive/MyDrive/논문 구현/ResNeXt(2017)/test_score_{epoch:03d}.pkl","wb") as f:
        pickle.dump(test_score, f)

    print(f"{epoch}/{epochs} epoch - train_Acc : {round(train_acc, 4)}, test_ACC : {round(test_acc, 4)}")

torch.save({
            'epoch': epoch,
            'optimizer_state_dict': optimizer.state_dict(),
            'model_state_dict': model.state_dict(),
        }, './drive/MyDrive/논문 구현/ResNeXt(2017)/ResNeXt_Cifar10_100_120.pth')

with open("./drive/MyDrive/논문 구현/ResNeXt(2017)/train_score_100_120.pkl","wb") as f: 
  pickle.dump(train_score, f)
with open("./drive/MyDrive/논문 구현/ResNeXt(2017)/test_score_100_120.pkl","wb") as f:
  pickle.dump(test_score, f)

391it [05:11,  1.26it/s]


101/120 epoch - train_Acc : 0.9999, test_ACC : 0.895


391it [05:00,  1.30it/s]


102/120 epoch - train_Acc : 1.0, test_ACC : 0.8978


391it [05:00,  1.30it/s]


103/120 epoch - train_Acc : 1.0, test_ACC : 0.899


391it [05:00,  1.30it/s]


104/120 epoch - train_Acc : 1.0, test_ACC : 0.8988


391it [05:00,  1.30it/s]


105/120 epoch - train_Acc : 1.0, test_ACC : 0.8997


391it [05:00,  1.30it/s]


106/120 epoch - train_Acc : 1.0, test_ACC : 0.8989


391it [05:00,  1.30it/s]


107/120 epoch - train_Acc : 1.0, test_ACC : 0.8985


391it [05:00,  1.30it/s]


108/120 epoch - train_Acc : 1.0, test_ACC : 0.8994


391it [05:00,  1.30it/s]


109/120 epoch - train_Acc : 1.0, test_ACC : 0.8985


391it [05:00,  1.30it/s]


110/120 epoch - train_Acc : 1.0, test_ACC : 0.9013


391it [05:00,  1.30it/s]


111/120 epoch - train_Acc : 1.0, test_ACC : 0.9013


391it [05:00,  1.30it/s]


112/120 epoch - train_Acc : 1.0, test_ACC : 0.9008


391it [05:00,  1.30it/s]


113/120 epoch - train_Acc : 1.0, test_ACC : 0.9015


391it [05:00,  1.30it/s]


114/120 epoch - train_Acc : 1.0, test_ACC : 0.9001


391it [05:00,  1.30it/s]


115/120 epoch - train_Acc : 1.0, test_ACC : 0.9025


391it [05:01,  1.30it/s]


116/120 epoch - train_Acc : 1.0, test_ACC : 0.9019


391it [05:01,  1.30it/s]


117/120 epoch - train_Acc : 1.0, test_ACC : 0.9019


391it [05:01,  1.30it/s]


118/120 epoch - train_Acc : 1.0, test_ACC : 0.9018


391it [05:00,  1.30it/s]


119/120 epoch - train_Acc : 1.0, test_ACC : 0.9017


FileNotFoundError: ignored

In [17]:
if input() == "Y":
  model_data = torch.load(input())

  model.load_state_dict(model_data['model_state_dict'])

  optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True)

  optimizer.load_state_dict(model_data['optimizer_state_dict'])

  start_epoch = model_data['epoch'] + 1

Y
/content/drive/MyDrive/논문 구현/ResNeXt(2017)/ResNeXt_Cifar10_Final.pth


In [18]:
model.eval()
test_acc = 0
for idx, batch in enumerate(test_loader):
  X_tmp, y_tmp = batch
  X_tmp, y_tmp = X_tmp.to(DEVICE), y_tmp.to(DEVICE)
  y_pred = model(X_tmp)
  y_pred = torch.tensor([torch.argmax(pred_list) for pred_list in y_pred], device=DEVICE)
  correct = (y_pred == y_tmp).sum().item()
  test_acc += correct
test_acc /= len(testset)

In [19]:
print(f"Final Test Accuracy : {test_acc:.3f}")

Final Test Accuracy : 0.902
