In [2]:
#The librarys imported are numpy, time, matplotlib, seaborn, Pytorch, TorchVision, Math, Sci-kit Learn.
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torchvision.datasets import EMNIST
import torchvision.transforms as transforms
import math 
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report


# Set the device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Below is transformation to applied to the images. They are transformed with a normalisaiton that normalizes the pixel values to ensure mean 0 and std 1, to increase the speed of convergence. 
# the ToTensor converts PIL image format to a PyTorch tensor. 
transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5,), (0.5,)) 
                ]) 

#Loads the EMNIST training and test set. NOTE: If running on google collab, set root='./Downloads', if running on a local IDA , the code will likely have to be amended to specify a filepath
train_set = EMNIST(root='/Users/rhyscooper/Downloads/EMNIST', split ='balanced', train=True, download=True, transform=transform)
test_set = EMNIST(root='/Users/rhyscooper/Downloads/EMNIST', split ='balanced', train=False, download=True, transform=transform)
                                      
class Model():
    def __init__(self, model, train_set, test_set):
        
        #Initialise the model (MLP or CNN), the train data set and the testing data set
        self.model = model
        self.train_set = train_set
        self.test_set = test_set

        #Initialise random seeds for reproducibility
        torch.manual_seed(0)
        np.random.seed(0)

        #Create a class_names dictionary that maps each class number (0-46) to the corresponding class name (0,...9, A,...Z, a,...t)
        class_names = self.test_set.classes
        self.labels_map = {}

        for class_number, class_name in enumerate(class_names):
            self.labels_map[class_number] = class_name
          
        #Initialise the train and test loaders based on the input train_set and test_set
        self.train_loader = torch.utils.data.DataLoader(self.train_set, batch_size=64, shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(self.test_set, batch_size=64, shuffle=False)
   
    def describe_dataset(self, data):
        '''
        For given input dataset (train_set or test_set), plot the first 6 samples along with their descriptive class labels,
        and print out how many samples are in the dataset
        '''

        if "Train" in str(data):
            data_set_name = "train_data"
        elif "Test" in str(data):
            data_set_name = "test_data"

        #Plot out the 'header' of the dataset, corresponding to the first 6 images and labels
        figure = plt.figure(figsize=(8, 7))
        plt.title(f"The First 6 Samples In {data_set_name}\nNumber of samples: {len(data)}")
        plt.axis("off")
        rows, cols = 2, 3

        for i in range(1, cols * rows + 1):
            img, label = data[i-1]
            img = img.mT
            figure.add_subplot(rows, cols, i)
            plt.title(self.labels_map[label])
            plt.axis("off")
            plt.imshow(img.squeeze(), cmap="gray")
            
        plt.show()

    def train(self, n_epochs=10, verbose=True):
      '''
      Trains the model for n_epochs, iterating over self.train_loader and for each batch caclulating the loss and implementing backpropagation.
      Verbose=True prints out the current epoch, current step (batch), and current loss.
      '''

      #Put model into training mode. Initialise start time, train loss, and train accuracy variables
      self.model.train()
      self.n_epochs = n_epochs
      start_time = time.time()
      total_step = len(self.train_loader)
      train_loss = []
      train_acc = []

      #Iterate over n_epochs
      for epoch in range(n_epochs):
        epoch_loss = []
        epoch_correct_predictions = 0 
        total_predictions = 0
  
        #Iterate over the batches in train_loader
        for i, (images, labels) in enumerate(self.train_loader):
          if self.model.name == 'MLP':  
            images, labels = images.reshape(-1, 28*28).to(device), labels.to(device)
          outputs = self.model(images)
          loss = self.model.loss(outputs, labels)
          
          #Implement L1 regularisation if required
          if self.model.l1_lambda > 0:
            l1_reg = torch.tensor(0., requires_grad=True)
            for name, param in self.model.named_parameters():
              if 'weight' in name:
                l1_reg = l1_reg + torch.norm(param, 1)
            loss += self.model.l1_lambda * l1_reg
      
          #Implement L2 regularisation if required
          if self.model.l2_lambda > 0:
            l2_reg = torch.tensor(0., requires_grad=True)
            for name, param in self.model.named_parameters():
              if 'weight' in name:
                l2_reg = l2_reg + torch.norm(param, 2)
            loss += self.model.l2_lambda * l2_reg

          #Update model weights and hyperparameters
          self.model.optimiser.zero_grad()
          loss.backward()
          self.model.optimiser.step()

          #Update variables tracking loss, predictions, and truth labels 
          epoch_loss.append(loss.item())
          predicted_labels = torch.max(outputs.data, 1)[1]
          epoch_correct_predictions += predicted_labels.eq(labels).sum().item()  
          total_predictions += labels.size(0)
          
          #Print out information about current epoch, step, and loss if verbose=True
          if verbose==True:
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                    .format(epoch+1, n_epochs, i+1, total_step, loss.item()))

        #Update learning rate according to learning rate scheduler hyperparameter         
        self.model.scheduler.step()

        #Calculate loss and accuracy
        train_loss.append(np.mean(epoch_loss))
        epoch_accuracy = epoch_correct_predictions / total_predictions
        train_acc.append(epoch_accuracy)

      self.train_acc, self.train_loss = train_acc, train_loss
      self.train_time = time.time() - start_time

      return self.train_acc, self.train_loss
    
    def plot(self):
        '''
        Display a twin-axes graph showing the training accuracy and loss over each epoch
        '''
        fig, ax1 = plt.subplots() 
        plt.title(f"Training Accuracy and Loss for {self.model.name}")

        x = [str(i+1) for i in range(self.n_epochs)]
        y1 = np.array(self.train_acc) * 100
        y2 = np.array(self.train_loss)

        ax1.set_xlabel('Epochs') 
        ax1.set_ylabel('Accuracy %', color='black') 
        ax1.set_ylim(bottom=min(y1)*0.95, top=max(min(y1) + 2, max(y1)*1.05))
        acc_plot = ax1.plot(x, y1, color='red', label='Accuracy') 

        # Adding Twin Axes
        ax2 = ax1.twinx()   
        ax2.set_ylabel('Loss', color='black') 
        ax2.set_ylim(bottom=min(y2)*0.90, top=max(min(y2) + 1, max(y2)*1.05))
        loss_plot = ax2.plot(x, y2, color='blue', label='Loss') 

        plots = acc_plot + loss_plot
        labels = [l.get_label() for l in plots]
        plt.legend(plots, labels, loc='lower center')

        # Show plot
        plt.show()

    def test(self):
        '''
        Test the model using test_loader, tracking the predicted values and corresponding truth labels
        '''

        #Initialise variables to keep track of model predictions, truth labels, and test loss
        self.predictions = []
        self.truth_labels = []
        self.test_loss = []
        self.model.eval()
        with torch.no_grad():
            correct_predictions = 0
            total_predictions = 0  

            #Iterate through images and labels in test_loader
            for images, labels in self.test_loader:
                if self.model.name == 'MLP':  
                    images, labels = images.reshape(-1, 28*28).to(device), labels.to(device)
                outputs = self.model(images)
                loss = self.model.loss(outputs, labels)
                self.test_loss.append(loss.item())
                predicted_labels = torch.max(outputs.data, 1)[1]
                self.predictions += predicted_labels.tolist()
                self.truth_labels += labels.tolist()
                correct_predictions += predicted_labels.eq(labels).sum().item()
                total_predictions += labels.size(0)
        
        #Average test loss across test loader, calculate accuracy
        self.test_loss = np.mean(self.test_loss)
        accuracy = 100 * correct_predictions / total_predictions

        #Print out accuracy, test loss, and training time
        print(f'Accuracy of the network on the {total_predictions} test images: {accuracy:.2f}%. Final test loss: {self.test_loss:.2f}.')
        print(f'The time taken to train the network was {int(self.train_time // 60)} mins {self.train_time % 60 :.0f} seconds')

        return accuracy

    def print_predictions(self):
        '''
        For the first 6 samples in test_loader, print out the predicted labels made by the model, along with the images and
        their truth labels
        '''
        #Reset self.test_loader as otherwise each new call shuffles the data, thus going out of sync with self.predictions
        self.test_loader = torch.utils.data.DataLoader(self.test_set, batch_size=64, shuffle=False)

        figure = plt.figure(figsize=(8, 7))
        plt.title("The First 6 Samples And Their Predicted Labels\n\n")
        plt.axis("off")
        rows, cols = 2, 3

        #Iterate through the first 6 images in test loader and print them, labelling them with their truth labels and model predictions
        for inputs, _ in self.test_loader:
            for i in range(1, cols * rows + 1):
                image  = inputs[i-1].cpu().numpy()
                image = image.T
                figure.add_subplot(rows, cols, i)
                plt.title(f"predicted label: {self.labels_map[self.predictions[i-1]]}\ntruth label: {self.labels_map[self.truth_labels[i-1]]}")
                plt.axis("off")
                plt.imshow(image.squeeze(), cmap="gray")
            break

        plt.show()

    def report(self):
        '''
        Create, print, and display plot of confusion matrix. Print out various performance metrics and a classification report.
        '''

        #Create and print confusion matrix
        conf_matrix = confusion_matrix(self.truth_labels, self.predictions)
        print("Confusion Matrix\n")
        print(conf_matrix)
        #Plot heatmap of conf matrix
        fig, ax = plt.subplots(figsize=(10, 8))
        sns.heatmap(conf_matrix, annot=False, fmt='g', cmap='Blues', xticklabels=self.labels_map.values(), yticklabels=self.labels_map.values())
        plt.title("Confusion Matrix")

        #Print out various metrics 
        print('\nAccuracy: {:.2f}%\n'.format(100*accuracy_score(self.truth_labels, self.predictions)))

        print('Micro Precision: {:.2f}'.format(precision_score(self.truth_labels, self.predictions, average='micro')))
        print('Micro Recall: {:.2f}'.format(recall_score(self.truth_labels, self.predictions, average='micro')))
        print('Micro F1-score: {:.2f}\n'.format(f1_score(self.truth_labels, self.predictions, average='micro')))

        print('Macro Precision: {:.2f}'.format(precision_score(self.truth_labels, self.predictions, average='macro')))
        print('Macro Recall: {:.2f}'.format(recall_score(self.truth_labels, self.predictions, average='macro')))
        print('Macro F1-score: {:.2f}\n'.format(f1_score(self.truth_labels, self.predictions, average='macro')))

        print('Weighted Precision: {:.2f}'.format(precision_score(self.truth_labels, self.predictions, average='weighted')))
        print('Weighted Recall: {:.2f}'.format(recall_score(self.truth_labels, self.predictions, average='weighted')))
        print('Weighted F1-score: {:.2f}'.format(f1_score(self.truth_labels, self.predictions, average='weighted')))

        #Print out classification report
        print('\nClassification Report\n')
        print(classification_report(self.truth_labels, self.predictions, target_names=[str(class_name) for class_name in self.labels_map.values()]))


#Define hyperparameter map for hyperparameter tuning
hyperparam_map = {'Learning Rate Scheduler': ['StepLR', 'ExponentialLR'],
                'Activation function': ['relu', 'leaky_relu', 'elu'],
                'Optimiser': [torch.optim.SGD, torch.optim.RMSprop, torch.optim.Adagrad],
                'Batch Normalisation': [True, False],
                'Regularisation': [(0.0001, 0), (0, 0.0001), (0.0001, 0.0001)],
                'Dropout': [True, False]
                }

#Initialise baseline hyperparameter dictionary
baseline_hyperparams = {'Learning Rate Scheduler': 'StepLR',
                        'Activation function': 'relu',
                        'Optimiser': torch.optim.SGD,
                        'Batch Normalisation': True,
                        'Regularisation': (0.0001, 0.0001),
                        'Dropout': True
                        }

#Initialise CNN hyperparameter dictionary
CNN_exclusive_baseline_hyperparams = {'out_channels_1':32, 
                                      'out_channels_2':64,
                                      'conv_kernel_size':2,
                                      'pool_kernel_size':2,
                                      'fcl size': 50}


class MLP(nn.Module):
    def __init__(self, hyperparam_dic, layer_1_size=600, layer_2_size=400, layer_3_size=200, learning_rate=0.1):
        super(MLP, self).__init__()
        self.name = 'MLP'
        self.hyperparam_dic = hyperparam_dic
        self.l1_lambda = baseline_hyperparams['Regularisation'][0]
        self.l2_lambda = baseline_hyperparams['Regularisation'][1]
        
        self.FCL1 = nn.Linear(28*28, layer_1_size) #Input size
        self.FCL2 = nn.Linear(layer_1_size, layer_2_size)
        self.FCL3 = nn.Linear(layer_2_size, layer_3_size)
        self.FCL4 = nn.Linear(layer_3_size, 47) #Number of classes   
        
        if hyperparam_dic['Activation function'] == 'relu':
          self.activation = nn.ReLU()        
        elif hyperparam_dic['Activation function'] == 'leaky_relu':
          self.activation = nn.LeakyReLU()
        elif hyperparam_dic['Activation function'] == 'elu':
          self.activation = nn.ELU()

        if self.hyperparam_dic['Batch Normalisation'] == True:
          self.bn1 = nn.BatchNorm1d(layer_1_size)
          self.bn2 = nn.BatchNorm1d(layer_2_size)
          self.bn3 = nn.BatchNorm1d(layer_3_size)
         
        if self.hyperparam_dic['Dropout'] == True:
          self.dropout = nn.Dropout(p=0.3)

        # Define hyperparameters
        self.optimiser = hyperparam_dic['Optimiser'](self.parameters(), lr=learning_rate)
        self.loss = nn.CrossEntropyLoss()

        if hyperparam_dic['Learning Rate Scheduler'] == 'StepLR':
            self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimiser, step_size=1, gamma=0.2)

        elif hyperparam_dic['Learning Rate Scheduler'] == 'ExponentialLR':
            self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimiser, gamma=0.2)

    def forward(self, x):
        #Layer 1
        out = self.FCL1(x)
        if self.hyperparam_dic['Batch Normalisation'] == True:
            out = self.bn1(out)
        out = self.activation(out)
        if self.hyperparam_dic['Dropout'] == True:
            out = self.dropout(out)

        #Layer 2 
        out = self.FCL2(out)
        if self.hyperparam_dic['Batch Normalisation'] == True:
            out = self.bn2(out)
        out = self.activation(out)
        if self.hyperparam_dic['Dropout'] == True:
            out = self.dropout(out)
        
        #Layer 3
        out = self.FCL3(out)
        if self.hyperparam_dic['Batch Normalisation'] == True:
            out = self.bn3(out)
        out = self.activation(out)
        if self.hyperparam_dic['Dropout'] == True:
            out = self.dropout(out)

        #Output layer
        out = self.FCL4(out)

        return out 
    
class CNN(nn.Module):
    def __init__(self, hyperparam_dic, CNN_exclusive_baseline_hyperparams, learning_rate=0.1,):
        super(CNN, self).__init__()
        self.name = 'CNN'
        self.hyperparam_dic = hyperparam_dic
        self.CNN_exclusive_baseline_hyperparams = CNN_exclusive_baseline_hyperparams
        input_shape = (1, 28, 28)     #Input = list(np.shape(train_set[0][0]))
        
        self.l1_lambda = baseline_hyperparams['Regularisation'][0]
        self.l2_lambda = baseline_hyperparams['Regularisation'][1]

        if hyperparam_dic['Activation function'] == 'relu':
          self.activation = nn.ReLU()        
        elif hyperparam_dic['Activation function'] == 'leaky_relu':
          self.activation = nn.LeakyReLU()
        elif hyperparam_dic['Activation function'] == 'elu':
          self.activation = nn.ELU()

        if hyperparam_dic['Dropout'] == True:
          self.dropout = nn.Dropout(p=0.3)

        # Define the convolution and pooling kernel size.
        self.conv_kernel_size = CNN_exclusive_baseline_hyperparams['conv_kernel_size']
        self.pool_kernel_size = CNN_exclusive_baseline_hyperparams['pool_kernel_size']

        # Define layers
        self.out_channels_1 = CNN_exclusive_baseline_hyperparams['out_channels_1']
        self.in_channels_2 = self.out_channels_1
        self.out_channels_2 = CNN_exclusive_baseline_hyperparams['out_channels_2']
        
        self.conv1 = nn.Conv2d(input_shape[0], self.out_channels_1, self.conv_kernel_size)
        self.conv2 = nn.Conv2d(self.in_channels_2, self.out_channels_2, self.conv_kernel_size)
        self.pool = nn.MaxPool2d(self.pool_kernel_size)

        self.fcl1_size = CNN_exclusive_baseline_hyperparams['fcl size']
        
        if hyperparam_dic['Batch Normalisation'] == True:
          self.bn1 = nn.BatchNorm2d(self.out_channels_1)
          self.bn2 = nn.BatchNorm2d(self.out_channels_2)
          self.bn3 = nn.BatchNorm1d(self.fcl1_size)

        #Calculate the output shape('num_flat_features') after convolutions and poolings
        channels = input_shape[0]
        width = input_shape[1]
        height = input_shape[2]
        
        width =  width - (self.conv_kernel_size-1)
        height = height - (self.conv_kernel_size-1)
        channels = self.out_channels_1
        
        width = math.floor(width /self.pool_kernel_size)
        height = math.floor(height /self.pool_kernel_size)
        channels = channels
        
        width =  width - (self.conv_kernel_size-1)
        height = height - (self.conv_kernel_size-1)
        channels = self.out_channels_2
        
        width = math.floor(width /self.pool_kernel_size)
        height = math.floor(height /self.pool_kernel_size)
        channels = channels
        
        self.num_flat_features = width * height * channels

        #Define the last fully connected layers
        self.FCL1 = nn.Linear(self.num_flat_features, self.fcl1_size)
        self.FCL2 = nn.Linear(self.fcl1_size, out_features=47)

        # Define hyperparameters
        self.optimiser = hyperparam_dic['Optimiser'](self.parameters(), lr=learning_rate)
        self.loss = nn.CrossEntropyLoss()

        if hyperparam_dic['Learning Rate Scheduler'] == 'StepLR':
            self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimiser, step_size=1, gamma=0.2)

        elif hyperparam_dic['Learning Rate Scheduler'] == 'ExponentialLR':
            self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimiser, gamma=0.2)

    def forward(self, x):
        #Layer 1
        out = self.conv1(x)                 
        if self.hyperparam_dic['Batch Normalisation'] == True:
            out = self.bn1(out)
        out = self.activation(out)
        out = self.pool(out)                    
        if self.hyperparam_dic['Dropout'] == True:
          out = self.dropout(out)

        #Layer 2
        out = self.conv2(out)                  
        if self.hyperparam_dic['Batch Normalisation'] == True:
            out = self.bn2(out)
        out = self.activation(out)
        out =  self.pool(out)                      
        if self.hyperparam_dic['Dropout'] == True:
          out = self.dropout(out)

        #Layer 3  
        out = out.reshape(-1, self.num_flat_features)                
        out = self.FCL1(out) 
        if self.hyperparam_dic['Batch Normalisation'] == True:
            out = self.bn3(out)
        out = self.activation(out)
        if self.hyperparam_dic['Dropout'] == True:
          out = self.dropout(out)
        
        #Output layer
        out = self.FCL2(out)
        return out

def reset_weights(model):
    if isinstance(model, nn.Conv2d) or isinstance(model, nn.Linear):
        model.reset_parameters()

def explore_w_kfold(model_choice, hyperparam_map, baseline_hyperparams, CNN_exclusive_baseline_hyperparams, n_epochs=5, k=5, Shuffle=True, batch_size=64):
    '''
    Systematically explore hyperparameter combinations, updating the baseline model if a new combination returns a better accuracy. 
    Use cross validation with k=5, training each training fold for n_epochs and then testing using the validation fold.
    '''
    
    #Print out current baseline
    print("Baseline Hyperparameters:")
    print("Model Choice:", model_choice.__name__)
    print("{")
    for key, value in baseline_hyperparams.items():
        print(f"{key}: {value}")   
    print("}\n")

    #Initialise baseline models
    if model_choice == MLP:
      baseline_NN = MLP(baseline_hyperparams).to(device)

    if model_choice == CNN:
      baseline_NN = CNN(baseline_hyperparams, CNN_exclusive_baseline_hyperparams).to(device)
    
    #Initialise, train, and test baseline
    baseline = Model(baseline_NN, train_set, test_set)
    baseline.train(n_epochs, verbose=False)
    baseline_accuracy = baseline.test()

    for hyperparam in hyperparam_map:

        #Create new model dictionary
        new_model_hyperparams = baseline_hyperparams.copy()
        options = hyperparam_map[hyperparam]
        options.remove(baseline_hyperparams[hyperparam])

        #Search through all options in the hyperparameter map, excluding the current baseline option
        for option in options:
            new_model_hyperparams[hyperparam] = option
        
            print("\nTesting:")
            print("Model Choice", model_choice.__name__)
            print("{")
            for key, value in new_model_hyperparams.items():
                print(f"{key}: {value}")   
            print("}")

            kfold = KFold(n_splits=k, shuffle=Shuffle)
            accuracy_per_fold = []

            #Perform k-fold cross validation
            for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(train_set)):
                print("Current Fold:", fold_idx + 1)
                train_loader = DataLoader(train_set, batch_size=batch_size, sampler=SubsetRandomSampler(train_idx))
                val_loader = DataLoader(train_set, batch_size=batch_size, sampler=SubsetRandomSampler(val_idx))
                if model_choice == MLP:
                    model = MLP(new_model_hyperparams).to(device)
                if model_choice == CNN:
                    model = CNN(new_model_hyperparams, CNN_exclusive_baseline_hyperparams).to(device)
                #NN used to refer to either model 
                NN = Model(model, train_loader.dataset, test_set)
                NN.train_loader = train_loader
                NN.test_loader = val_loader
                #Train and test new model
                NN.train(n_epochs, verbose=False) 
                new_model_accuracy = NN.test() 
                accuracy_per_fold.append(new_model_accuracy)
                model.apply(reset_weights)


            new_model_accuracy = np.mean(accuracy_per_fold) 
            print(f"New model average accuracy: {new_model_accuracy:.2f}%")
            #Compare new model accuracy to current baseline; if better, update baseline
            if new_model_accuracy > baseline_accuracy:
                print("Success! Improvement on the previous combination of hyper-parameters.")
                baseline_hyperparams[hyperparam] = option
                baseline_accuracy = new_model_accuracy
                baseline = model

    print(f"\nFinal hyperparameter combination, with {baseline_accuracy:.2f}% accuracy:")
    print("{")
    for key, value in baseline_hyperparams.items():
        print(f"{key}: {value}")  
    print("}")

    return baseline


'''
Use the explore_w_kfold function to systematically find the best hyperparameter combinations for each model. We returned the following combinations.
'''
#best_mlp = explore_w_kfold(MLP, hyperparam_map, baseline_hyperparams, CNN_exclusive_baseline_hyperparams, n_epochs=5, k=5, Shuffle=True, batch_size=64)
#best_cnn = explore_w_kfold(CNN, hyperparam_map, baseline_hyperparams, CNN_exclusive_baseline_hyperparams, n_epochs=5, k=3, Shuffle=True, batch_size=64)

MLP_best_hyperparams = {
'Learning Rate Scheduler': 'StepLR',
'Activation function': 'relu',
'Optimiser': torch.optim.SGD,
'Batch Normalisation': True,
'Regularisation': (0.0001, 0.0001),
'Dropout': False
}

CNN_best_hyperparams = {
'Learning Rate Scheduler': 'StepLR',
'Activation function': 'elu',
'Optimiser': torch.optim.Adagrad,
'Batch Normalisation': True,
'Regularisation': (0.0001, 0),
'Dropout': False
}

best_mlp = MLP(MLP_best_hyperparams).to(device)
best_mlp_model = Model(best_mlp, train_set, test_set)
best_mlp_model.describe_dataset(train_set)
best_mlp_model.train(n_epochs=10)
# best_mlp_model.plot()
# best_mlp_model.test()
# best_mlp_model.print_predictions()
# best_mlp_model.report()

# best_cnn = CNN(CNN_best_hyperparams, CNN_exclusive_baseline_hyperparams).to(device)
# best_cnn_model = Model(best_cnn, train_set, test_set)
# best_cnn_model.describe_dataset(test_set)
# best_cnn_model.train(n_epochs=10)
# best_cnn_model.plot()
# best_cnn_model.test()
# best_cnn_model.print_predictions()
# best_mlp_model.report()


Downloading https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip to /Users/rhyscooper/Downloads/EMNIST/EMNIST/raw/gzip.zip


 35%|███▍      | 194629376/561753746 [04:07<07:46, 786337.07it/s]  


RuntimeError: File not found or corrupted.

In [None]:
final_hyperparams = {
'Learning Rate Scheduler': 'StepLR',
'Activation function': 'relu',
'Optimiser': torch.optim.SGD,
'Batch Normalisation': True,
'Regularisation': (0.0001, 0.0001),
'Dropout': False
}

print("\nTesting:")
print("Model Choice MLP")
print("{")
for key, value in final_hyperparams.items():
    print(f"{key}: {value}")   
print("}")

kfold = KFold(n_splits=5, shuffle=True)
accuracy_per_fold = []

for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(train_set)):
    print("Current Fold:", fold_idx + 1)
    train_loader = DataLoader(train_set, batch_size=64, sampler=SubsetRandomSampler(train_idx))
    val_loader = DataLoader(train_set, batch_size=64, sampler=SubsetRandomSampler(val_idx))
    model = MLP(final_hyperparams).to(device)
    NN = Model(model, train_loader.dataset, test_set)
    NN.train_loader = train_loader
    NN.test_loader = val_loader
    NN.train(40, verbose=False) 
    new_model_accuracy = NN.test() 
    accuracy_per_fold.append(new_model_accuracy)
    model.apply(reset_weights)

Epoch [1/50], Step [100/1763], Loss: 2.8651
Epoch [1/50], Step [200/1763], Loss: 2.3685
Epoch [1/50], Step [300/1763], Loss: 2.2689
Epoch [1/50], Step [400/1763], Loss: 2.4977
Epoch [1/50], Step [500/1763], Loss: 2.3928
Epoch [1/50], Step [600/1763], Loss: 1.9627
Epoch [1/50], Step [700/1763], Loss: 2.1782
Epoch [1/50], Step [800/1763], Loss: 1.9875
Epoch [1/50], Step [900/1763], Loss: 2.2550
Epoch [1/50], Step [1000/1763], Loss: 1.6382
Epoch [1/50], Step [1100/1763], Loss: 1.9800
Epoch [1/50], Step [1200/1763], Loss: 1.8016
Epoch [1/50], Step [1300/1763], Loss: 1.7735
Epoch [1/50], Step [1400/1763], Loss: 1.7234
Epoch [1/50], Step [1500/1763], Loss: 1.5972
Epoch [1/50], Step [1600/1763], Loss: 1.4861
Epoch [1/50], Step [1700/1763], Loss: 1.5909
Epoch [2/50], Step [100/1763], Loss: 1.3739
Epoch [2/50], Step [200/1763], Loss: 1.4970
Epoch [2/50], Step [300/1763], Loss: 1.5019
Epoch [2/50], Step [400/1763], Loss: 1.2472
Epoch [2/50], Step [500/1763], Loss: 1.4560
Epoch [2/50], Step [600/