In [32]:
# Importing all necessary packages
import torch
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import Compose 
# Setting randomness seed to ensure reproducability 
torch.manual_seed(5) #5

<torch._C.Generator at 0x72935d8e3150>

In [33]:
# The following sets the transformations for all images in the test set. This converts it to a tensor, and normalizes it. 

transform = transforms.Compose(
    [transforms.Resize(32),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# The following transforms the training images in the same way as the testing images, with the only
# difference being a 0.35 chance of flipping the image horizontally. 

train_transform = Compose([
    transforms.Resize(32),
    transforms.RandomHorizontalFlip(p=0.35),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Sets batch size of 8
batch_size = 8

# Creates trainset and trainloader, applying transformations and batchsize. 

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

# Creates trainset and trainloader, applying transformations and batchsize. 

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# Determines classes for the classification. 
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified



Source of dataloader code: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [34]:
# Sets device to be GPU if it is available, and CPU if it is not. 
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  

In [35]:
# Prints chosen device
print(dev)

cuda:0


In [36]:
# Creates model architecture 
class simple_net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Input: 3x32x32
        

        # Defines average pooling layers to carry out spatial average pooling. 
        # The layer adapts to the input size of the feature map and pools it down 
        # to a 1x1 value. 
        self.pool = nn.AdaptiveAvgPool2d((1,1))

        # Defines max pooling layer with a kernel size of 2x2.
        self.max_pool = nn.MaxPool2d(2,2)
        
        # Defines average pooling layer with a kernel size of 2x2
        self.avg_pool = nn.AvgPool2d(2,2)
        
        # Defines linear layers which which are used to create vector a. 
        self.linear = nn.Linear(3 * 1 * 1, 3)
        self.linear2 = nn.Linear(120 * 1 * 1, 3)
        self.linear3 = nn.Linear(240 * 1 * 1, 3)

        # Defines final output linear layer. 
        self.linear_output = nn.Linear(560*1*1,10)
        
        # Defines three convolutional layers, which will make up three blocks within the network. 
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 120, kernel_size = 5, padding = 2)
        self.conv2 = nn.Conv2d(in_channels = 120, out_channels = 240, kernel_size = 3, padding = 1)
        self.conv3 = nn.Conv2d(in_channels = 240, out_channels = 560, kernel_size = 3, padding = 1)

        # Defines Batch Normalization 
        # Batch normalizations are used to stabilize the training process. 
        self.batch1 = nn.BatchNorm2d(120)
        self.batch2 = nn.BatchNorm2d(240)
        self.batch3 = nn.BatchNorm2d(560)
    
    def forward(self, x):
        
        ### BLOCK ONE ###
        
        # a is generated. 
        a = F.relu(self.linear(torch.flatten(self.pool(x),1)))
        # three convolutional layers recieve the x input, after which a batch normalisation, an activation function 
        # and pooling layer are applied. 
        conv_output_1 = self.max_pool(F.relu(self.batch1(self.conv1(x))))
        conv_output_2 = self.max_pool(F.relu(self.batch1(self.conv1(x))))
        conv_output_3 = self.max_pool(F.relu(self.batch1(self.conv1(x))))
        combined_conv = []
        # Each convolutional layer output is multiplied by its respective a value. The for loop goes through each element 
        # of the batch. 
        for i in range(len(a)):
            combined_conv.append(conv_output_1[i] * a[i][0] + conv_output_2[i] * a[i][1] + conv_output_3[i] * a[i][2])
        # The stack function is then used to combine the results back into a tensor of dimensions: (batch-size, channel numbers, height, width)
        O = torch.stack(combined_conv)
        
        ### BLOCK TWO ###
        a = F.relu(self.linear2(torch.flatten(self.pool(O),1)))
        conv_output_1 = self.max_pool(F.relu(self.batch2(self.conv2(O))))
        conv_output_2 = self.max_pool(F.relu(self.batch2(self.conv2(O))))
        conv_output_3 = self.max_pool(F.relu(self.batch2(self.conv2(O))))
        combined_conv = []
        for i in range(len(a)):
            combined_conv.append(conv_output_1[i] * a[i][0] + conv_output_2[i] * a[i][1] + conv_output_3[i] * a[i][2])
        O_2 = torch.stack(combined_conv)
        
        ### BLOCK THREE ###
        a = F.relu(self.linear3(torch.flatten(self.pool(O_2),1)))
        conv_output_1 = F.relu(self.batch3(self.conv3(O_2)))
        conv_output_2 = F.relu(self.batch3(self.conv3(O_2)))
        conv_output_3 = F.relu(self.batch3(self.conv3(O_2)))
        combined_conv = []
        for i in range(len(a)):
            combined_conv.append(conv_output_1[i] * a[i][0] + conv_output_2[i] * a[i][1] + conv_output_3[i] * a[i][2])
        O_3 = torch.stack(combined_conv)
        
        # Classifier 
        # Spatial average pooling is applied to the output of the last block
        x = self.pool(O_3)
        # The tensor is flattened into a flat vector. 
        x = torch.flatten(x, 1)
        # The final classifier is a linear layer with a relu activation function. 
        x = F.relu(self.linear_output(x))
        return x

# Network is edefined and sent to the active device

simple_net = simple_net().to(dev)

In [37]:
# Importing optimizer
import torch.optim as optim
# Importing optimizer schedueler
from torch.optim.lr_scheduler import ReduceLROnPlateau
# Learning rate of 0.0001 is defined. 
lr = 0.001

# CrossEntropyLoss is used to generate a loss value for the network.
criterion = nn.CrossEntropyLoss()
# Stochastic Gradient Descent with momentum of 0.9 is used as the optimizer
optimizer = optim.SGD(simple_net.parameters(),lr=lr, momentum = 0.9)
# A learning rate scheduler is defined to lower the learning rate if the validation accuracy is 
# not improved after 6 epochs. 
scheduler = ReduceLROnPlateau(optimizer, 'max', patience = 6, verbose = True)



In [None]:
# Lists are defined to store accuracies, loss, epochs and the learning rate over time for evaluation purposes
training_accuracies = []
testing_accuracies = []
loss_over_time = []
epochs = []
learning_rates = []
# Number of epochs is defined
epoch_num = 35
# Loops through the dataset for the predefined number of epochs
for epoch in range(epoch_num):  

    running_loss = 0.0
    epoch_loss = []
    for i, data in enumerate(trainloader, 0):
        # Inputs are extracted from the train set dataloader. 
        inputs, labels = data
        # Data is then sent to the gpu.
        inputs, labels = inputs.to(dev), labels.to(dev)

        # Gradients are set to zero to ensure gradients from the previous loop are not left over. 
        optimizer.zero_grad()

        # The network produces an input
        outputs = simple_net(inputs)
        # A loss value is calculated
        loss = criterion(outputs, labels)
        # Gradients for the loss are generated 
        loss.backward()
        # Gradients are used to update optimizer
        optimizer.step()
          
        epoch_loss.append(loss.item())
        # Loss is kept track of and printed throughout each epoch. 
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0
            
    #keeps track of epoch loss and computes an average
    loss_over_time.append(sum(epoch_loss)/len(epoch_loss))
    # Correct prediction variables are created to allow further evaluation. 
    train_correct = 0
    train_total = 0
    
    # torch.no_grad() is set to ensure the gradients are not updated whilst the model is being evaluated 
    # on the training data
    with torch.no_grad():
        for train_data in trainloader:
            # Data is loaded
            train_images, train_labels = train_data
                # Data is sent to gpu
            train_images, train_labels = train_images.to(dev), train_labels.to(dev)
                # Data is put through model
            train_outputs = simple_net(train_images)
                # Predictions are extracted
            _, predicted = torch.max(train_outputs.data, 1)
                # Total predictions and correct predictions are updated 
                # to calculate accuracy metric. 
            train_total += train_labels.size(0)
            train_correct += (predicted == train_labels).sum().item()
    
    # Training accuracies are added to their list and printed. 
    training_accuracies.append(100 * train_correct // train_total)
    print(f'Training accuracy of the network on the 10000 test images: {100 * train_correct // train_total} %')
    
    # The process of assessing the model on the validation set is the same as assessing it on the training set
    # with the only difference being that the simple_net.eval() is put the model into its evlauation mode. 
    test_correct = 0
    test_total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    simple_net.eval()
    with torch.no_grad():
        for test_data in testloader:
            test_images, test_labels = test_data
            test_images, test_labels = test_images.to(dev), test_labels.to(dev)
            # calculate outputs by running images through the network
            test_outputs = simple_net(test_images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(test_outputs.data, 1)
            test_total += test_labels.size(0)
            test_correct += (predicted == test_labels).sum().item()
    testing_accuracies.append(100 * test_correct // test_total)
    print(f'Test accuracy of the network on the 10000 test images: {100 * test_correct // test_total} %')
    simple_net.train()
    
    
    epochs.append(epoch)
    
    # The scheduler is given the test accuracy to check whether it needs to update the learning rate. 
    scheduler.step(100 * test_correct // test_total)

    for param_group in optimizer.param_groups:
         learning_rates.append(param_group['lr'])

print('Finished Training')

[1,  2000] loss: 2.182
[1,  4000] loss: 1.775
[1,  6000] loss: 1.624
Training accuracy of the network on the 10000 test images: 46 %
Test accuracy of the network on the 10000 test images: 49 %
[2,  2000] loss: 1.523
[2,  4000] loss: 1.457
[2,  6000] loss: 1.403
Training accuracy of the network on the 10000 test images: 50 %
Test accuracy of the network on the 10000 test images: 54 %
[3,  2000] loss: 1.341
[3,  4000] loss: 1.258
[3,  6000] loss: 1.103
Training accuracy of the network on the 10000 test images: 63 %
Test accuracy of the network on the 10000 test images: 66 %
[4,  2000] loss: 1.042
[4,  4000] loss: 1.019
[4,  6000] loss: 1.003
Training accuracy of the network on the 10000 test images: 65 %
Test accuracy of the network on the 10000 test images: 69 %
[5,  2000] loss: 0.959
[5,  4000] loss: 0.925
[5,  6000] loss: 0.918
Training accuracy of the network on the 10000 test images: 70 %
Test accuracy of the network on the 10000 test images: 73 %
[6,  2000] loss: 0.850
[6,  4000] l