In [None]:
%matplotlib inline

# Our MNIST Model

In [None]:
# Hyperparameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [None]:
from pathlib import Path
import requests
from torchvision import transforms

trans = torchvision.transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = torchvision.datasets.MNIST(root='data\mnist', train=True, transform=trans, download=True)
test_dataset = torchvision.datasets.MNIST(root='data\mnist', train=False, transform=trans)

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
from torch import optim, nn

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = nn.Linear(1000, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
model = ConvNet()

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Run the forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track the accuracy
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels).sum().item()
        acc_list.append(correct / total)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 100))

In [None]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format((correct / total) * 100))

torch.save(model.state_dict(), 'saved_models/mnist_conv_model.ckpt')

In [None]:
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d

p = figure(y_axis_label='Loss', width=850, y_range=(0, 1), title='PyTorch ConvNet results')
p.extra_y_ranges = {'Accuracy': Range1d(start=0, end=100)}
p.add_layout(LinearAxis(y_range_name='Accuracy', axis_label='Accuracy (%)'), 'right')
p.line(np.arange(len(loss_list)), loss_list)
p.line(np.arange(len(loss_list)), np.array(acc_list) * 100, y_range_name='Accuracy', color='red')
show(p)

# Our CIFAR-10 Model

In [None]:
import torch
import torchvision
from torchvision import transforms

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [None]:
trainset = torchvision.datasets.CIFAR10(root='data\cifar', train=True, download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='data\cifar', train=False, download=True, transform=transform_test)

In [None]:
import torch.nn as nn

class CIFAR_CNN(nn.Module):

    def __init__(self):
        super(CIFAR_CNN, self).__init__()

        self.conv_layer = nn.Sequential(

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),

            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )


        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )


    def forward(self, x):
        # conv layers
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        # fc layer
        x = self.fc_layer(x)

        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import time
import numpy as np
from torch import optim

# This training loop optimizes batch size
cifar_batch_models = []

criterion = nn.CrossEntropyLoss()

list_of_training_losses = []
list_of_testing_losses = []
for batch in range(1, 10):
    next_batch = 2 ** batch
    print()
    print('--------------------------------------------------------------')
    print('--------------------------------------------------------------')
    print('Training with batch size {}'.format(next_batch))
    
    cifar_model_batch = CIFAR_CNN()
    cifar_model_batch.to(device)
    cifar_batch_models.append(cifar_model_batch)
    
    optimizer = optim.SGD(cifar_model_batch.parameters(), lr=0.01)
    
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=next_batch, shuffle=True, num_workers=2)
    testloader = torch.utils.data.DataLoader(testset, batch_size=next_batch, shuffle=False, num_workers=2)
    
    training_losses = []
    testing_losses = []
    for epoch in range(15):
        # Train
        start = time.time()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()                 # Zero the gradients

            outputs = cifar_model_batch(inputs)                 # Forward pass
            loss = criterion(outputs, targets)    # Compute the Loss
            loss.backward()                       # Compute the Gradients

            optimizer.step()                      # Updated the weights
            training_losses.append(loss.item())
            end = time.time()

            if batch_idx % 100 == 0:
                print('Batch Index : %d Loss : %.3f Time : %.3f seconds ' % (batch_idx, np.mean(training_losses), end - start))

                start = time.time()
                
        # Evaluate
        cifar_model_batch.eval()
        total = 0
        correct = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(testloader):
                inputs, targets = inputs.to(device), targets.to(device)

                outputs = cifar_model_batch(inputs)
                loss = criterion(outputs, targets)
                testing_losses.append(loss.item())
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += predicted.eq(targets.data).cpu().sum()

            print('Epoch : %d Test Acc : %.3f' % (epoch, 100.*correct/total))
            print('--------------------------------------------------------------')
                
    list_of_training_losses.append(training_losses)
    list_of_testing_losses.append(testing_losses)

In [None]:
import matplotlib.pyplot as plt

# Here we graph training losses and testing losses for the different batch sizes
num_batches = len(list_of_training_losses)
fig, axes = plt.subplots(num_batches, 2, figsize=(15,40))
for batch_idx in range(num_batches):
    training_ax = axes[batch_idx][0]
    testing_ax = axes[batch_idx][1]
    
    training_ax.plot(list_of_training_losses[batch_idx])
    training_ax.set_title('Training loss for a batch size of {}'.format(2 ** (batch_idx + 1)))
    testing_ax.plot(list_of_testing_losses[batch_idx])
    testing_ax.set_title('Testing loss for a batch size of {}'.format(2 ** (batch_idx + 1)))
plt.show()

In [None]:
import time
import numpy as np
from torch import optim

best_batch_size_low_epoch = 16
best_batch_size_high_epoch = 512

trainloader_lr = torch.utils.data.DataLoader(trainset, batch_size=best_batch_size_low_epoch, shuffle=True, num_workers=2)
testloader_lr = torch.utils.data.DataLoader(testset, batch_size=best_batch_size_low_epoch, shuffle=False, num_workers=2)

# This training loop optimizes learning rate
cifar_lr_models = []

criterion_lr = nn.CrossEntropyLoss()
lrs = [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5]

list_of_training_losses_lr = []
list_of_testing_losses_lr = []
for learning_rate in lrs:
    print()
    print('--------------------------------------------------------------')
    print('--------------------------------------------------------------')
    print('Training with learning rate {}'.format(learning_rate))
    
    cifar_model_lr = CIFAR_CNN()
    cifar_model_lr.to(device)
    cifar_lr_models.append(cifar_model_lr)
    
    optimizer_lr = optim.SGD(cifar_model_lr.parameters(), lr=learning_rate)
    
    training_losses_lr = []
    testing_losses_lr = []
    for epoch in range(15):
        # Train
        start = time.time()
        for batch_idx, (inputs, targets) in enumerate(trainloader_lr):
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer_lr.zero_grad()                 # Zero the gradients

            outputs = cifar_model_lr(inputs)      # Forward pass
            loss = criterion_lr(outputs, targets) # Compute the Loss
            loss.backward()                       # Compute the Gradients

            optimizer_lr.step()                   # Updated the weights
            training_losses_lr.append(loss.item())
            end = time.time()

            if batch_idx % 100 == 0:
                print('Batch Index : %d Loss : %.3f Time : %.3f seconds ' % (batch_idx, np.mean(training_losses_lr), end - start))

                start = time.time()
                
        # Evaluate
        cifar_model_lr.eval()
        total = 0
        correct = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(testloader_lr):
                inputs, targets = inputs.to(device), targets.to(device)

                outputs = cifar_model_lr(inputs)
                loss = criterion_lr(outputs, targets)
                testing_losses_lr.append(loss.item())
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += predicted.eq(targets.data).cpu().sum()

            print('Epoch : %d Test Acc : %.3f' % (epoch, 100.*correct/total))
            print('--------------------------------------------------------------')
                
    list_of_training_losses_lr.append(training_losses_lr)
    list_of_testing_losses_lr.append(testing_losses_lr)

In [None]:
import pickle

with open('variables/learning_rate.pickle', 'wb') as f:
    pickle.dump([list_of_training_losses_lr, list_of_testing_losses_lr], f)

In [None]:
plt.close(fig)

# Here we graph training losses and testing losses for the different learning rates
num_lrs = len(lrs)
fig_lr, axes_lr = plt.subplots(num_lrs, 2, figsize=(15,40))
for lr_idx in range(num_lrs):
    training_ax_lr = axes_lr[lr_idx][0]
    testing_ax_lr = axes_lr[lr_idx][1]
    
    training_ax_lr.plot(list_of_training_losses_lr[lr_idx])
    training_ax_lr.set_title('Training loss for a learning rate of {}'.format(lrs[lr_idx]))
    testing_ax_lr.plot(list_of_testing_losses_lr[lr_idx])
    testing_ax_lr.set_title('Testing loss for a learning rate of {}'.format(lrs[lr_idx]))
plt.show()

In [None]:
best_learning_rate = 0.01

trainloader_mom = torch.utils.data.DataLoader(trainset, batch_size=best_batch_size_low_epoch, shuffle=True, num_workers=2)
testloader_mom = torch.utils.data.DataLoader(testset, batch_size=best_batch_size_low_epoch, shuffle=False, num_workers=2)

# This training loop optimizes momentum
cifar_mom_models = []

criterion_mom = nn.CrossEntropyLoss()
momentums = [0.0, 0.2, 0.4, 0.6, 0.8, 0.99]

list_of_training_losses_mom = []
list_of_testing_losses_mom = []
for mom in momentums:
    print()
    print('--------------------------------------------------------------')
    print('--------------------------------------------------------------')
    print('Training with momentum {}'.format(mom))
    
    cifar_model_mom = CIFAR_CNN()
    cifar_model_mom.to(device)
    cifar_mom_models.append(cifar_model_mom)
    
    optimizer_mom = optim.SGD(cifar_model_mom.parameters(), lr=best_learning_rate, momentum=mom)
    
    training_losses_mom = []
    testing_losses_mom = []
    for epoch in range(15):
        # Train
        start = time.time()
        for batch_idx, (inputs, targets) in enumerate(trainloader_mom):
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer_mom.zero_grad()                 # Zero the gradients

            outputs = cifar_model_mom(inputs)      # Forward pass
            loss = criterion_mom(outputs, targets) # Compute the Loss
            loss.backward()                       # Compute the Gradients

            optimizer_mom.step()                   # Updated the weights
            training_losses_mom.append(loss.item())
            end = time.time()

            if batch_idx % 100 == 0:
                print('Batch Index : %d Loss : %.3f Time : %.3f seconds ' % (batch_idx, np.mean(training_losses_mom), end - start))

                start = time.time()
                
        # Evaluate
        cifar_model_mom.eval()
        total = 0
        correct = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(testloader_mom):
                inputs, targets = inputs.to(device), targets.to(device)

                outputs = cifar_model_mom(inputs)
                loss = criterion_mom(outputs, targets)
                testing_losses_mom.append(loss.item())
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += predicted.eq(targets.data).cpu().sum()

            print('Epoch : %d Test Acc : %.3f' % (epoch, 100.*correct/total))
            print('--------------------------------------------------------------')
                
    list_of_training_losses_mom.append(training_losses_mom)
    list_of_testing_losses_mom.append(testing_losses_mom)

In [None]:
plt.close(fig_lr)

# Here we graph training losses and testing losses for the momentum values
num_moms = len(momentums)
fig_mom, axes_mom = plt.subplots(num_moms, 2, figsize=(15,40))
for mom_idx in range(num_moms):
    training_ax_mom = axes_mom[mom_idx][0]
    testing_ax_mom = axes_mom[mom_idx][1]
    
    training_ax_mom.plot(list_of_training_losses_mom[mom_idx])
    training_ax_mom.set_title('Training loss for a momentum of {}'.format(momentums[mom_idx]))
    testing_ax_mom.plot(list_of_testing_losses_mom[mom_idx])
    testing_ax_mom.set_title('Testing loss for a momentum of {}'.format(momentums[mom_idx]))
plt.show()

In [None]:
import time
import numpy as np
from torch import optim

best_batch_size_low_epoch = 16
best_batch_size_high_epoch = 512

trainloader = torch.utils.data.DataLoader(trainset, batch_size=best_batch_size_high_epoch, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=best_batch_size_high_epoch, shuffle=False, num_workers=2)

# This training loop uses the best hyperparameters

best_learning_rate = 0.01
best_momentum = 0.3

cifar_model = CIFAR_CNN()
cifar_model.load_state_dict(torch.load('saved_models/cifar_conv_model.ckpt'))
cifar_model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(cifar_model.parameters(), lr=best_learning_rate, momentum=best_momentum)

training_losses = []
testing_losses = []
for epoch in range(30):
    # Train
    start = time.time()
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()                 # Zero the gradients

        outputs = cifar_model(inputs)         # Forward pass
        loss = criterion(outputs, targets)    # Compute the Loss
        loss.backward()                       # Compute the Gradients

        optimizer.step()                      # Updated the weights
        training_losses.append(loss.item())
        end = time.time()
        
        if batch_idx % 100 == 0:
            print('Batch Index : %d Loss : %.3f Time : %.3f seconds ' % (batch_idx, np.mean(training_losses), end - start))
      
            start = time.time()
        
    # Evaluate
    cifar_model.eval()
    total = 0
    correct = 0
    
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = cifar_model(inputs)
            loss = criterion(outputs, targets)
            testing_losses.append(loss.item())
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

        print('Epoch : %d Test Acc : %.3f' % (epoch, 100.*correct/total))
        print('--------------------------------------------------------------')
    cifar_model.train()

In [None]:
torch.save(cifar_model.state_dict(), 'saved_models/cifar_conv_model.ckpt')