In [1]:
import torch
import random
import numpy as np

import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.utils.data as data

import torch
import torch.nn as nn
import torch.nn.functional as F

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
!nvidia-smi

Wed Nov 16 04:51:55 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   74C    P8    12W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Data Preparation

In [3]:
ROOT = '.data'
train_data = datasets.CIFAR10(root = ROOT, train = True, download = True)

# Compute means and standard deviations along the R,G,B channel

means = train_data.data.mean(axis = (0,1,2)) / 255
stds = train_data.data.std(axis = (0,1,2)) / 255

Files already downloaded and verified


In [4]:
train_transforms = transforms.Compose([
                           transforms.RandomRotation(5),
                           transforms.RandomHorizontalFlip(0.5),
                           transforms.RandomCrop(32, padding = 2),
                           transforms.ToTensor(),
                           transforms.Normalize(mean = means, std = stds)
                       ])

test_transforms = transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean = means, std = stds)
                       ])

In [5]:
train_data = datasets.CIFAR10(ROOT, 
                              train = True, 
                              download = True, 
                              transform = train_transforms)

test_data = datasets.CIFAR10(ROOT, 
                             train = False, 
                             download = True, 
                             transform = test_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
VALID_RATIO = 0.9

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples

train_data, valid_data = data.random_split(train_data, [n_train_examples, n_valid_examples])

In [7]:
BATCH_SIZE = 256

train_iterator = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True) 

valid_iterator = DataLoader(valid_data, batch_size = BATCH_SIZE, shuffle = False) 

test_iterator = DataLoader(test_data, batch_size = BATCH_SIZE, shuffle = False) 

## Modeling

In [8]:
class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d( in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        block = BasicBlock
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.linear = nn.Linear(1024, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [9]:
from torchsummary import summary

#model = ResNet([2, 2, 2]).to(device)
model = ResNet([3, 3, 3]).to(device)

summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]          36,864
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
        BasicBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
       BasicBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

In [10]:
import torch.optim as optim

lr = 1e-3

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [11]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [12]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for (x, y) in iterator:
        
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
                
        y_pred = model(x)
        
        loss = criterion(y_pred, y)
        
        acc = calculate_accuracy(y_pred, y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [13]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        
        for (x, y) in iterator:

            x = x.to(device)
            y = y.to(device)

            y_pred = model(x)

            loss = criterion(y_pred, y)

            acc = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
EPOCHS = 100

# Fill training code here
train_loss_list = []
train_acc_list = []
val_loss_list = []
val_acc_list = []

best_valid_loss = float('inf')

for epoch in range(1, EPOCHS+1):
  train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
  valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
  train_loss_list.append(train_loss)
  train_acc_list.append(train_acc)
  val_loss_list.append(valid_loss)
  val_acc_list.append(valid_acc)

  if valid_loss < best_valid_loss:
    best_valid_loss = valid_loss
    torch.save(model.state_dict(), 'best.pt')
  print("EPOCH: ", epoch)
  print("Train loss: ", train_loss)
  print("Train acc: ", train_acc)
  print("Validation loss: ", valid_loss)
  print("Validation acc: ", valid_acc)

  scheduler.step()

EPOCH:  1
Train loss:  1.6476190638813106
Train acc:  0.3943164062432267
Validation loss:  1.3515185296535492
Validation acc:  0.5081571698188782
EPOCH:  2
Train loss:  1.2557860348712315
Train acc:  0.5500665838745508
Validation loss:  1.1695684611797332
Validation acc:  0.5803998172283172
EPOCH:  3
Train loss:  1.0863372514193708
Train acc:  0.6114346591586416
Validation loss:  1.029601600766182
Validation acc:  0.6292394310235977
EPOCH:  4
Train loss:  0.9579682827673175
Train acc:  0.6596342328597199
Validation loss:  0.9926017612218857
Validation acc:  0.6448874086141586
EPOCH:  5
Train loss:  0.8575609939342196
Train acc:  0.6971129260279916
Validation loss:  0.8856010198593139
Validation acc:  0.6912454038858413
EPOCH:  6
Train loss:  0.7800793126225471
Train acc:  0.7256427556276321
Validation loss:  0.8387568056583404
Validation acc:  0.7063304215669632
EPOCH:  7
Train loss:  0.7187297733669932
Train acc:  0.7475941049104388
Validation loss:  0.8241463392972946
Validation acc:

In [None]:
model.load_state_dict(torch.load('best.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print("Test loss: ", test_loss)
print("Test acc: ", test_acc)

In [None]:
import matplotlib.pyplot as plt
epochs = range(1, 80+1)
plt.plot(epochs, train_acc_list, 'bo', label="training acc")
plt.plot(epochs, val_acc_list, 'b', label="validation acc")
plt.legend()
plt.show()

In [None]:
epochs = range(1, 80+1)
plt.plot(epochs, train_loss_list, 'ro', label="training loss")
plt.plot(epochs, val_loss_list, 'r', label="validation loss")
plt.legend()
plt.show()

## Test ACC
**100 epochs, SGD**
* Pure model (Res14): 71.17%
* Res20: 67.41%

In [None]:
with open("results/logs/SGD2")

In [None]:
def plot_acc():
    epochs = range(1, self.epochs+1)
    plt.plot(epochs, self.train_acc_list, 'bo', label="Training acc")
    plt.plot(epochs, self.val_acc_list, 'b', label="Validation acc")
    plt.legend()
    plt.show()

def plot_loss():
    epochs = range(1, self.epochs+1)
    plt.plot(epochs, self.train_loss_list, 'ro', label="Training loss")
    plt.plot(epochs, self.val_loss_list, 'r', label="Validation loss")
    plt.legend()
    plt.show()