## Model Comparison Jupyter notebook

This notebook will load nets as defined int the imported python modules with certain preformance characteristics for comparison.
This file will also contain the tensorboard that helps visualize the models accuracy over time and the training process.

Components:
- initialize training parameters and fetch dataset (including RotMNIST dataset)
- compare model attributes (such as total parameters and structure)
- define hyperparameters
- create tensorboard and set up preformance graphs
- compare model training and preformance under different conditions
- save models to files

In [None]:
### Imports for pytorch and dataset

import torch
from torch.autograd import Variable

import torchvision
from torchvision import datasets
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

import torch.nn as nn

from RotMNIST import RotMNIST

In [None]:
### Define dataloaders
### Instatiate RotMNIST and verify behaviour below with the dataloaders
dataset_rot = RotMNIST(
    root = 'data',
    download=True,
    train=True,
    transform=torchvision.transforms.Compose(
        [torchvision.transforms.Resize(32), torchvision.transforms.ToTensor()]
    ),
    rotation_mirroring=True
)

test_dataset_rot = RotMNIST(
    root = 'data',
    download=True,
    train=False,
    transform=torchvision.transforms.Compose(
        [torchvision.transforms.Resize(32), torchvision.transforms.ToTensor()]
    ),
    rotation_mirroring=True
)

dataset_upright = RotMNIST(
    root = 'data',
    download=True,
    train=True,
    transform=torchvision.transforms.Compose(
        [torchvision.transforms.Resize(32), torchvision.transforms.ToTensor()]
    ),
    rotation_mirroring=False,
)

In [None]:
### Import different networks from python files
# TODO: Uncomment other networks and import
from p4m_conv import P4MNet
from p4_conv import P4Net
from z2_conv import ConvNet

p4m_net = P4MNet()
p4_net = P4Net()
conv_net = ConvNet()

p4m_total_params = sum(p.numel() for p in p4m_net.parameters() if p.requires_grad)
p4_total_params = sum(p.numel() for p in p4_net.parameters() if p.requires_grad)
z2_total_params = sum(p.numel() for p in conv_net.parameters() if p.requires_grad)

print(p4m_net)
print(p4_net)
print(conv_net)

print("P4M  --\tTrainable Params: " + str(p4m_total_params))
print("P4   --\tTrainable Params: " + str(p4_total_params))
print("Conv --\tTrainable Params: " + str(z2_total_params))

In [None]:
### Hyperparameters
learning_rate = 0.001
batch_size = 64
epochs = 25

### Objectives/Loss fn
loss_fn = nn.CrossEntropyLoss()

### Dataloaders
train_dataloader_rot = DataLoader(dataset_rot, batch_size=batch_size, shuffle=True)
test_dataloader_rot = DataLoader(test_dataset_rot, batch_size=batch_size, shuffle=True)
train_dataloader_upright = DataLoader(dataset_upright, batch_size=batch_size, shuffle=True)

In [None]:
### Tensorboard helpers
import matplotlib.pyplot as plt
import numpy as np

# helper function to show an image (copied from https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html)
# (used in the `plot_classes_preds` function below)
def imshow(img):
    img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(npimg, cmap="Greys")

### Tensorboard
import torch.utils.tensorboard
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/p4m_MNIST_1')

# Get grid of training images
dataiter = iter(train_dataloader_upright)
images, labels = dataiter.next()
img_grid = torchvision.utils.make_grid(images)
imshow(img_grid)

# To tensorboard
writer.add_image('Training Batch', img_grid)

writer.add_graph(p4m_net, images)
writer.close()

classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
# helper function
def select_n_random(data, labels, n=100):
    '''
    Selects n random datapoints and their corresponding labels from a dataset
    '''
    assert len(data) == len(labels)

    perm = torch.randperm(len(data))
    return data[perm][:n], labels[perm][:n]

# select random images and their target indices
images, labels = select_n_random(dataset_upright.data, dataset_upright.targets)

# get the class labels for each image
class_labels = [classes[lab] for lab in labels]

# log embeddings
features = images.view(-1, 28 * 28)
writer.add_embedding(features,
                    metadata=class_labels,
                    label_img=images.unsqueeze(1))
writer.close()

# helper functions

def images_to_probs(net, images):
    '''
    Generates predictions and corresponding probabilities from a trained
    network and a list of images
    '''
    output = net(images)
    # convert output probabilities to predicted class
    _, preds_tensor = torch.max(output, 1)
    preds = np.squeeze(preds_tensor.cpu().numpy())
    return preds, [torch.nn.functional.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]


def plot_classes_preds(net, images, labels):
    '''
    Generates matplotlib Figure using a trained network, along with images
    and labels from a batch, that shows the network's top prediction along
    with its probability, alongside the actual label, coloring this
    information based on whether the prediction was correct or not.
    Uses the "images_to_probs" function.
    '''
    preds, probs = images_to_probs(net, images)
    # plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(32, 32))
    for idx in np.arange(64):
        ax = fig.add_subplot(8, 8, idx+1, xticks=[], yticks=[])
        imshow(images[idx].cpu())
        ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
            preds[idx],
            probs[idx] * 100.0,
            labels[idx]),
                    color=("green" if preds[idx]==labels[idx].item() else "red"))
            
    return fig


In [None]:
def train_loop(dataloader, model, loss_fn, optimizer, cur_epoch):
    running_loss = 0.0
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        model.to(device)
        # Compute prediction and loss for backprop
        pred = model(X.to(device))
        loss = loss_fn(pred, y.to(device))

        # Backpropagation by setting grad to zero, calculating using backprop engine and stepping (using learning rate)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch % 100 == 99:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

            writer.add_scalar('training loss', running_loss / 100, cur_epoch * len(dataloader) + batch)
            running_loss = 0.0

def test_loop(dataloader, model, loss_fn):
    model.to(device)
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0
    
    # No gradient on training data (faster computation and no optimization happening here anyway)
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()

    test_loss /= size
    correct /= size
    
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct

In [None]:
### Train all networks
def train_test_net(net, train_upright):
    # Add option to train networks with RotMNIST
    test_dataloader = train_dataloader_rot
    if (train_upright):
        test_dataloader = train_dataloader_upright

    optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train_loop(test_dataloader, net, loss_fn, optimizer, t)
        correct = test_loop(test_dataloader_rot, net, loss_fn)
        writer.add_scalar('Test Performance', correct, t * len(test_dataloader_rot) + batch_size)
    print('Finished Training Net + ' + str(type(net)))

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

writer = SummaryWriter('runs/p4m_MNIST_1')
train_test_net(p4m_net, True)

writer = SummaryWriter('runs/p4_MNIST_1')
train_test_net(p4_net, True)

writer = SummaryWriter('runs/conv_MNIST_1')
train_test_net(conv_net, False)

convu_net = ConvNet()
writer = SummaryWriter('runs/convu_MNIST_1')
train_test_net(convu_net, True)

### Model Comparisons

There are a few comparisons between the models to be made here. Here is a list of the following that I log
- Model accuracy on 10000 test images
- Model accuracy per class

In [None]:
def test_accuracy(net):
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in test_dataloader_rot:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.to(device)).sum().item()

    print('Accuracy of the ' + str(type(net)) + ' on the 10000 test images: %f %%' % (
        100.0 * correct / total))

In [None]:
def class_labels(net):
    # prepare to count predictions for each class
    correct_pred = {num : 0 for num in range(0, 10)}
    total_pred = {num : 0 for num in range(0, 10)}

    # again no gradients needed
    with torch.no_grad():
        for data in test_dataloader_rot:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images.to(device))
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[label.item()] += 1
                total_pred[label.item()] += 1

    print('Classes for ' + str(type(net)))

    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("Accuracy for num {} is: {:.1f} %".format(classname,
                                                   accuracy))

In [None]:
test_accuracy(p4m_net)
class_labels(p4m_net)

test_accuracy(p4_net)
class_labels(p4_net)

test_accuracy(conv_net)
class_labels(conv_net)

test_accuracy(convu_net)
class_labels(convu_net)

In [1]:
## TODO: Uncomment
torch.save(p4m_net, 'upright-trained-p4m-1.pth')
torch.save(p4_net, 'upright-trained-p4-1.pth')
torch.save(conv_net, 'rot-trained-conv-1.pth')
torch.save(convu_net, 'upright-trained-conv-1.pth')

NameError: name 'torch' is not defined