In [1]:
import time
import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.autograd import Variable
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms as transforms
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import torchvision.models as models
import matplotlib.pyplot as plt

from net import *

# see if GPU is available
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

# reduce dataset sizes for debugging model
DEBUG = False

GeForce 940MX


In [2]:
batch_size = 32
transform = transforms.Compose([
    # Must convert all images to tensors first to be processed.
    transforms.ToTensor(),
    # Normalize images to mean 0, variance 1 (improves training)
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

In [3]:
# load the train set
train_set = CIFAR10("data", train=True, transform=transform, download=True)
n_train = len(train_set)

if DEBUG:
    n_train = 128
    train_set = [train_set[i] for i in range(n_train)]

# Create a loader for the training set
train_loader = DataLoader(train_set,batch_size=batch_size,shuffle=True,num_workers=4)

# Load the test set, note that train is set to False
test_set = CIFAR10(root="data", train=False, transform=transform, download=True)
n_test = len(test_set)

if DEBUG:
    n_test = 128
    test_set = [test_set[i] for i in range(n_test)]

# Create a loder for the test set, note that both shuffle is set to false for the test loader
test_loader = DataLoader(test_set, batch_size=batch_size,shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Saves a model 
def save_models(model, model_name, epoch):
    
    directory = "models/{0}/".format(model_name)
    filename = "{0}.pth".format(epoch)
    
    # make the directory if it doesn't already exist
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    torch.save(model.state_dict(), directory + filename)
    print("Checkpoint saved")

In [5]:
# Evaluate performance of a model
def test(model):
    model.eval()
    test_acc = 0.0
    for i, (images, labels) in enumerate(test_loader):
    
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        # Predict classes using images from the test set
        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        
        test_acc += int(torch.sum(prediction == labels.data))

    # Compute the average acc and loss over all 10000 test images
    test_acc = test_acc / n_test

    return test_acc

In [6]:
# Trains a model and saves it if it has the best test accuracy we've seen so far
def train(model, optimizer, loss_fn, num_epochs, save_model=False, model_name="", verbose=False):
    best_acc = 0.0
    
    # if cuda is available, move the model to the GPU
    if torch.cuda.is_available():
        model.cuda()

    for epoch in range(num_epochs):
        
        model.train()
        
        train_acc = 0.0
        train_loss = 0.0
        
        start = time.time()
        
        for i, (images, labels) in enumerate(train_loader):
            
            # Move images and labels to gpu if available
            if torch.cuda.is_available():
                images = Variable(images.cuda())
                labels = Variable(labels.cuda())

            # Clear all accumulated gradients
            optimizer.zero_grad()
            
            # Predict classes using images from the test set
            outputs = model(images)
            
            # Compute the loss based on the predictions and actual labels
            loss = loss_fn(outputs, labels)
            
            # Backpropagate the loss
            loss.backward()

            # Adjust parameters according to the computed gradients
            optimizer.step()
            
            train_loss += loss.cpu().data.item() * images.size(0)
            _, prediction = torch.max(outputs.data, 1)
            
            train_acc += int(torch.sum(prediction == labels.data))

        # Compute the average acc and loss over all training images
        train_acc = train_acc / n_train
        train_loss = train_loss / n_train

        # Evaluate on the test set
        test_acc = test(model)

        # Save the model (saving criteria should not be test acc, as this is cheating)
        if save_model:
            save_models(model, model_name, epoch)
            
        end = time.time()
        ep_time = end - start

        # Print the metrics
        if verbose:
            print("Epoch {0}, Train Accuracy: {1:.3f} , TrainLoss: {2:.3f} , Test Accuracy: {3:.3f}, Time: {4:.2f}s".format(epoch, train_acc, train_loss, test_acc, ep_time))

In [None]:
models = {}

In [None]:
num_epochs = 50
save_model = True

for num_conv in [1, 2, 3]:
    for num_channels in [16, 32, 64]:
        
        if num_conv==2 and num_channels==32: continue
        
        model_name = "SimpleNet_conv=%d_ch=%d" % (num_conv, num_channels)
        print("Training", model_name)
        
        # Create model, optimizer and loss function
        models[model_name] = SimpleNet(num_conv=num_conv, num_channels=num_channels, num_classes=10)

        # Define the optimizer and loss function
        optimizer = Adam(models[model_name].parameters(), lr=0.001)
        loss_fn = nn.CrossEntropyLoss()
        
        # Train loop
        train(models[model_name], optimizer, loss_fn, num_epochs, 
              save_model=save_model, model_name=model_name, verbose=True)

Training SimpleNet_conv=1_ch=16
Checkpoint saved
Epoch 0, Train Accuracy: 0.441 , TrainLoss: 1.553 , Test Accuracy: 0.539, Time: 43.54s
Checkpoint saved
Epoch 1, Train Accuracy: 0.560 , TrainLoss: 1.236 , Test Accuracy: 0.591, Time: 46.32s
Checkpoint saved
Epoch 2, Train Accuracy: 0.601 , TrainLoss: 1.135 , Test Accuracy: 0.618, Time: 55.26s
Checkpoint saved
Epoch 3, Train Accuracy: 0.620 , TrainLoss: 1.082 , Test Accuracy: 0.626, Time: 50.98s
Checkpoint saved
Epoch 4, Train Accuracy: 0.633 , TrainLoss: 1.041 , Test Accuracy: 0.633, Time: 52.46s
Checkpoint saved
Epoch 5, Train Accuracy: 0.643 , TrainLoss: 1.021 , Test Accuracy: 0.636, Time: 52.01s
Checkpoint saved
Epoch 6, Train Accuracy: 0.649 , TrainLoss: 1.002 , Test Accuracy: 0.644, Time: 51.91s
Checkpoint saved
Epoch 7, Train Accuracy: 0.653 , TrainLoss: 0.982 , Test Accuracy: 0.645, Time: 53.20s
Checkpoint saved
Epoch 8, Train Accuracy: 0.657 , TrainLoss: 0.972 , Test Accuracy: 0.652, Time: 53.29s
Checkpoint saved
Epoch 9, Train 