## Task 1

### Imports

In [None]:
from torch import nn, optim, no_grad, float, device, cuda, sum
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor, Resize, Compose
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
# import torch_directml - I own an AMD gpu so this is a pytorch port using directml backend

from torch.utils.tensorboard import SummaryWriter

# device = torch_directml.device(0)

device = device('cuda' if cuda.is_available() else 'cpu')

print(f"Using {device} device")

Using cuda device


### Loading the datasets

In Pytorch, there are datasets and dataloaders. Dataset contains the entire collection of training, testing samples while the dataloader providers an iterable wrapper that gives us the samples in batches.

An additional step is performed to split training into training and validation.

In [None]:
batch_size = 64

orig_dataset = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_dataset = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:28<00:00, 5963643.72it/s] 


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [None]:
# Some hyperparameters can be optionally left out
# For speed and brevity, only focusing on optimizers and activations might be good with lr 1e-4

activations = [nn.LeakyReLU, nn.Tanh]
optimizers = [optim.SGD, optim.Adam]


### Basic CNN
- Leaky ReLU activation
- SGD optimizer
- 1e-3 learning rate

In [None]:
class BasicCNN(nn.Module):
    def __init__(self, activation):
        super().__init__()
        self.stack = nn.Sequential( # image is 3x32x32
            nn.Conv2d(3, 18, kernel_size=3, padding='same'), activation(), # image is 18x32x32
            nn.MaxPool2d(kernel_size=2, stride=2), # 18x16x16
            nn.Conv2d(18, 54, kernel_size=3, padding='same'), activation(), # image is 54x16x16
            nn.MaxPool2d(kernel_size=2, stride=2), # 54x8x8
            nn.Conv2d(54, 54, kernel_size=3, padding='same'), activation(), # image is 54x16x16
            nn.MaxPool2d(kernel_size=2, stride=2), # 54x4x4
            nn.AvgPool2d(kernel_size=4), # should try global
            nn.Flatten(),
            nn.Linear(54, 20), activation(),
            nn.Linear(20, 10)
        )

    def forward(self, x):
        logits = self.stack(x)
        return logits

### Training annd Test Procedures

In [None]:
def train(train_dataloader, model, loss_fn, optimizer):
    size = len(train_dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device), y.to(device)
        y_hat = model.forward(X)
        loss = loss_fn(y_hat, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 1000 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    return loss

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    return test_loss

### Training

In [None]:
# I would suggest reducing this to run it faster as full training might not be required
epochs = 25
lr = 1e-4
''' for lr in learning_rates:
    for optimizer_fn in optimizers:
        for activation in activations:
            for batch_size in batch_sizes:
 '''
from itertools import product
# Instead of writing out four nested for-loops, each combination of lr, optimizer, activation, and batch is combined in one tuple
# Same speed, looks tidier
hyperparam_combos = product(optimizers, activations)

model_results = {}

for combo in iter(hyperparam_combos):
  # lr = combo[0]
  optimizer_fn = combo[0]
  activation = combo[1]
  # batch_size = combo[3]

  train_dataset, val_dataset = random_split(dataset=orig_dataset, lengths=[0.8,0.2])

  train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size)
  val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size)
  test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size)


  model = BasicCNN(activation=activation)
  model.to(device)

  loss_fn = nn.CrossEntropyLoss()
  optimizer = optimizer_fn(model.parameters(), lr=lr)

  # Saves one event file per hyperparameter combo as named below
  # writer = SummaryWriter(f"runs/BasicCNN/batch={batch_size} lr={lr} activation={activation()._get_name()} optimizer={optimizer_fn.__name__}")

  # runs current hyperparameter combo training as usual
  for t in range(epochs):
      print(f"Epoch {t+1}\n-------------------------------")
      train_loss = train(train_dataloader, model, loss_fn, optimizer)
      # writer.add_scalar(tag='Training Loss', scalar_value=train_loss, global_step=t)
      val_loss = test(val_dataloader, model, loss_fn)
      # writer.add_scalar(tag='Validation Loss', scalar_value=val_loss, global_step=t)

  correct = 0
  for X, y in test_dataloader:
    X, y = X.to(device), y.to(device)
    y_hat = model.forward(X)
    correct += (y_hat.argmax(axis=1) == y).sum().item()
  model_results[f"activation={activation()._get_name()} optimizer={optimizer_fn.__name__}"] = correct*100/len(test_dataloader.dataset)
  # writer.close()
print("Done!")

Epoch 1
-------------------------------
loss: 2.320946  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308742 

Epoch 2
-------------------------------
loss: 2.320692  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308616 

Epoch 3
-------------------------------
loss: 2.320441  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308492 

Epoch 4
-------------------------------
loss: 2.320193  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308370 

Epoch 5
-------------------------------
loss: 2.319950  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308251 

Epoch 6
-------------------------------
loss: 2.319709  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308134 

Epoch 7
-------------------------------
loss: 2.319466  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.308019 

Epoch 8
-------------------------------
loss: 2.319229  [   64/40000]
Test Error: 
 Accuracy: 10.4%, Avg loss: 2.307907 

Epoch 9
----------------

In [None]:
for key, value in model_results.items():
  print(f"Test accuracy of {key} model was {value}%")

Test accuracy of activation=LeakyReLU optimizer=SGD model was 10.0%
Test accuracy of activation=Tanh optimizer=SGD model was 11.2%
Test accuracy of activation=LeakyReLU optimizer=Adam model was 41.52%
Test accuracy of activation=Tanh optimizer=Adam model was 48.85%
