<a href="https://colab.research.google.com/github/dougyd92/ResNet/blob/main/HyperparamSweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup


## Install and import libraries

In [5]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import pandas as pd
import numpy as np

from torchinfo import summary

## Prepare training data from CIFAR10

In [7]:
# Based on example code from: https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## Define the model

In [8]:
# Based on example code from: https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, strides=[1,2,2,2], num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=strides[0])
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=strides[1])
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=strides[2])
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=strides[3])
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        if num_blocks > 0:
            strides = [stride] + [1]*(num_blocks-1)
            layers = []
            for stride in strides:
                layers.append(block(self.in_planes, planes, stride))
                self.in_planes = planes * block.expansion
            return nn.Sequential(*layers)
        else:
            # When 0 residual blocks are used for a given layer
            # just use a simple convolution and batch norm instead
            # in order to keep the dimensions correct
            layer =  nn.Sequential(nn.Conv2d(self.in_planes, planes, kernel_size=1,
                                stride=stride, padding=1, bias=False),
                          nn.BatchNorm2d(planes))
            self.in_planes = planes * block.expansion
            return layer


    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


## Training functions

In [9]:
# Based on example code from: https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py

LR = 0.1

# Training
def train_one_epoch(net, optimizer, criterion):
    
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {100.*correct/total:.2f}%')
    return train_loss, 100.*correct/total


def test(net, optimizer, criterion):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

        print(f'\tTest Loss: {test_loss:.3f} | Test Acc: {100.*correct/total:.2f}%')
        return test_loss, 100.*correct/total

    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc

In [10]:
EPOCHS = 50

def train(net, LR=0.1):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=LR,
                        momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

    train_accuracy_history = []
    train_loss_history = []
    test_accuracy_history = []
    test_loss_history = []

    for epoch in range(EPOCHS):
        print('\nEpoch: %d' % epoch)
        train_loss, train_acc = train_one_epoch(net, optimizer, criterion)
        test_loss, test_acc = test(net, optimizer, criterion)

        train_accuracy_history.append(train_acc)

        train_loss_history.append(train_loss)
        test_accuracy_history.append(test_acc)
        test_loss_history.append(test_loss)

        scheduler.step()

    return {'train': (train_loss_history, train_accuracy_history), 'validation': (test_loss_history, test_accuracy_history)}

## Helper functions

In [13]:
!mkdir ./results

In [11]:
def save_to_excel(data, table_name):
  df = pd.DataFrame(data=data)
  df = df.T

  df.to_excel(f'./results/{table_name}.xlsx')

def parse_results(results, phase, stat='accuracy'):
  stats = {}

  stat = 1 if stat == 'accuracy' else 0

  for key in results.keys():
      avg = np.average(results[key][phase][stat])
      max = np.max(results[key][phase][stat])
      stats[key] = {'average': avg, 'max': max}

  return stats

# Parameter sweeps

## Residual blocks per layer

In [None]:
results = {}
for layer1_blocks in range(1, 5):
  for layer2_blocks in range(1, 4):
      for layer3_blocks in range(3):
          for layer4_blocks in range(2):
            num_blocks = [layer1_blocks, layer2_blocks, layer3_blocks, layer4_blocks]
            net = ResNet(BasicBlock, num_blocks)
            net = net.to(device)
            if device == 'cuda':
                net = torch.nn.DataParallel(net)
                cudnn.benchmark = True
            print(f'num_blocks={num_blocks}')

            model_summary = summary(net, (128, 3, 32, 32))
            n_params = model_summary.trainable_params
            if n_params > 5000000:
                print(f"Model has too many parameters ({n_params}), will skip")
                continue
            else:
                print(f"Will train model with {n_params}  parameters")

            result = train(net)
            results[f'{layer1_blocks}{layer2_blocks}{layer3_blocks}{layer4_blocks}'] = result
            
            # Save intermittently in case training fails
            save_to_excel(parse_results(results, 'validation'), 'WIP_block_num_results_val')
            save_to_excel(parse_results(results, 'train'), 'WIP_block_num_results_train')

save_to_excel(parse_results(results, 'validation'), 'block_num_results_val')
save_to_excel(parse_results(results, 'train'), 'block_num_results_train')

## Stride

In [19]:
results = {}
for layer1stride in range(1, 4):
  for layer2stride in range(1, 4):
    for layer3stride in range(1, 4):
        # Best from previous parameter sweep
        num_blocks=[2, 3, 2, 0]

        strides=[layer1stride, layer2stride, layer3stride, 2]

        net = ResNet(BasicBlock, num_blocks, strides=strides)
        net = net.to(device)
        if device == 'cuda':
            net = torch.nn.DataParallel(net)
            cudnn.benchmark = True
        print(f'strides={strides}')

        try:
            model_summary = summary(net, (128, 3, 32, 32))
            n_params = model_summary.trainable_params
            if n_params > 5000000:
                print(f"Model has too many parameters ({n_params}), will skip")
                continue
            else:
                print(f"Will train model with {n_params}  parameters")
        except:
            print(f"Invalid combination (strides={strides}), will skip. Most likely a dimensionality error.")
            continue

        result = train(net)
        results[f'strides:{layer1stride}{layer2stride}{layer3stride}'] = result

        # Save intermittently in case training fails
        save_to_excel(parse_results(results, 'validation'), 'WIP_stride_results_val')
        save_to_excel(parse_results(results, 'train'), 'WIP_stride_results_train')

save_to_excel(parse_results(results, 'validation'), 'stride_results_val')
save_to_excel(parse_results(results, 'train'), 'stride_results_train')

## Learning Rate

In [None]:
results = {}
lr = 0.02
while lr < 0.2:

    # Best from previous parameter sweep
    num_blocks=[2, 3, 2, 0]
    strides=[1,2,2,2]

    net = ResNet(BasicBlock, num_blocks, strides=strides)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True
    print(f'learning rate = {lr}')

    result = train(net, LR=lr)
    results[f'LR={lr}'] = result
    
    lr += 0.02

    # Save intermittently in case training fails
    save_to_excel(parse_results(results, 'validation'), 'WIP_LR_results_val')
    save_to_excel(parse_results(results, 'train'), 'WIP_LR_results_train')

save_to_excel(parse_results(results, 'validation'), 'LR_results_val')
save_to_excel(parse_results(results, 'train'), 'LR_results_train')