# Instructions

Simply set the Settings below to the variant of the experiment you wish to run, and click Runtime > Run All.

Note that the experiments run in the report only varied in terms of dataset used and optimizer used. The other settings are included for convenience, but need not be altered to reproduce the experiments.

In [None]:
# Code inspired by https://github.com/erykml/medium_articles/blob/master/Computer%20Vision/lenet5_pytorch.ipynb

### SETTINGS
import sys

## Dataset. For convenience, comment out the one you do not wish to use.
# --------------
dataset_to_use = "FashionMNIST"
# dataset_to_use = "CIFAR"

## Optimizer. For convenience, comment out the ones you do not wish to use.
# --------------
optimizer = "Adam"
#optimizer = "RMSprop"
# optimizer = "SGD"

## Runtime. Time to run PSO for, in minutes.
# (Note: the current iteration will be completed, so technically it will always run a little longer)
# --------------
time = 60

## Misc settings. Should not need to be modified. 
# Note that the runtime will terminate the experiments before N_EPOCHS is reached.
# --------------
LEARNING_RATE = 0.001
N_EPOCHS = sys.maxsize

In [None]:
# Import modules
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.nn.functional as F
from datetime import datetime
from tensorflow.keras.datasets import fashion_mnist
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# check device
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
from torchvision import datasets, transforms # This import appears to be bugged, hence why we reload it in multiple cells

def dataset(variant):
  """Loads in appropriate dataset given string.
    Valid options are 'FashionMNIST' and 'CIFAR'.

    Currently restricted to 10000 images for train and validation/test set due to runtime considerations. 
    """
  transformer = transforms.Compose([transforms.ToTensor()])

  if(variant is 'FashionMNIST'):
    # download and create datasets
    train_dataset = datasets.FashionMNIST(root='mnist_data', 
                                  train=True, 
                                  transform=transformer,
                                  download=True)

    valid_dataset = datasets.FashionMNIST(root='mnist_data', 
                                  train=False, 
                                  transform=transformer)
  
  elif(variant is 'CIFAR'):
    train_dataset = datasets.CIFAR10(root='data', 
                               train=True, 
                               transform=transformer,
                               download=True)

    valid_dataset = datasets.CIFAR10(root='data',
                               train=False, 
                               transform=transformer)
  else:
    print("Not a valid dataset")

  train_dataset = [train_dataset[idx] for idx in range(10000)]
  valid_dataset = [valid_dataset[idx] for idx in range(10000)]
  
  train_loader = DataLoader(train_dataset, batch_size=len(train_dataset))
  valid_loader = DataLoader(valid_dataset, batch_size=len(valid_dataset))

  return train_loader, valid_loader

In [None]:
# parameters
RANDOM_SEED = 1337

if dataset_to_use is 'FashionMNIST':
  greyscale = True
elif dataset_to_use is 'CIFAR':
  greyscale = False

N_CLASSES = 10

In [None]:
train_loader, valid_loader = dataset(dataset_to_use)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to mnist_data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting mnist_data/FashionMNIST/raw/train-images-idx3-ubyte.gz to mnist_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to mnist_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting mnist_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to mnist_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting mnist_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to mnist_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting mnist_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/FashionMNIST/raw



In [None]:
class LeNet5(nn.Module):
    def __init__(self, n_classes, greyscale=False):
        super(LeNet5, self).__init__()

        if greyscale:
            in_channels = 1
            pad = 2
        else:
            in_channels = 3
            pad = 0
        
        self.feature_extractor = nn.Sequential(            
            nn.Conv2d(in_channels, out_channels=6 * in_channels, kernel_size=5, stride=1, padding=pad),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(6 * in_channels, out_channels=16 * in_channels, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(16 * in_channels, out_channels=120 * in_channels, kernel_size=5, stride=1),
            nn.ReLU()
        )

        self.classifier = nn.Sequential(
            nn.Linear(120 * in_channels, out_features=84 * in_channels),
            nn.ReLU(),
            nn.Linear(84 * in_channels, out_features=n_classes),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probs = F.softmax(logits, dim=1)
        return logits, probs

In [None]:
def train(train_loader, model, criterion, optimizer, device):
    '''
    Function for the training step of the training loop
    '''
    model.train() # Sets the model to 'training mode'
    running_loss = 0
    
    for X, y_true in train_loader: #X = <batchsize> images, y_true = <batchsize> labels

        optimizer.zero_grad() # Resetting gradients for each sample is necessary to ensure we don't track more gradients than needed.
        
        X = X.to(device)
        y_true = y_true.to(device)
    
        # Forward pass
        y_hat, _ = model(X) # Returns logits (y_hat) and probs (here not used)
        loss = criterion(y_hat, y_true)
        #avg loss across whole batch (<batchsize>) * <batchsize>
        running_loss += loss.item() * X.size(0) # loss.item is just the loss value, converted to something Python can use.

        # Backward pass
        loss.backward() # backpropagation.
        optimizer.step()
        
        #epoch = één keer door de hele dataset
    epoch_loss = running_loss / len(train_loader.dataset) #total loss across dataset / # samples
    return model, optimizer, epoch_loss

In [None]:
def validate(valid_loader, model, criterion, device):
    '''
    Function for the validation step of the training loop
    '''
   
    model.eval()
    running_loss = 0
    
    for X, y_true in valid_loader:
    
        X = X.to(device)
        y_true = y_true.to(device)

        # Forward pass and record loss
        y_hat, _ = model(X) 
        loss = criterion(y_hat, y_true) 
        running_loss += loss.item() * X.size(0)

    epoch_loss = running_loss / len(valid_loader.dataset)
        
    return model, epoch_loss

In [None]:
def get_accuracy(model, data_loader, device):
    '''
    Function for computing the accuracy of the predictions over the entire data_loader
    '''
    
    correct_pred = 0 
    n = 0
    
    with torch.no_grad(): #Grads are not interesting anymore since we are interested in the accuracy
        model.eval() # puts it in evaluation mode
        for X, y_true in data_loader:

            X = X.to(device)
            y_true = y_true.to(device)

            _, y_prob = model(X) #softmaxed logits, score from 0 to 1
            _, predicted_labels = torch.max(y_prob, 1) # just takes the highest number out of 10 classes, being the predicted label

            n += y_true.size(0) 
            correct_pred += (predicted_labels == y_true).sum()

    return correct_pred.float() / n # outputs the percentage of correct predictions 

In [None]:
def training_loop(model, criterion, optimizer, train_loader, valid_loader, epochs, device, time, print_every=1):
    '''
    Function defining the entire training loop
    '''
    import datetime
    # set objects for storing metrics
    best_loss = 1e10
    train_losses = [] 
    valid_losses = []
    val_acc_list = []
 
    endTime = datetime.datetime.now() + datetime.timedelta(minutes=time)
    epoch = 0
    # Train model
    while datetime.datetime.now() <= endTime:
    #for epoch in range(0, epochs):
        # training
        model, optimizer, train_loss = train(train_loader, model, criterion, optimizer, device)
        train_losses.append(train_loss)

        # validation
        with torch.no_grad():
            model, valid_loss = validate(valid_loader, model, criterion, device)
            valid_losses.append(valid_loss)

        if epoch % print_every == (print_every - 1):
            
            train_acc = get_accuracy(model, train_loader, device=device)
            valid_acc = get_accuracy(model, valid_loader, device=device)
            val_acc_list.append(valid_acc)
                
            print(f'{datetime.datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Train accuracy: {100 * train_acc:.2f}\t'
                  f'Valid accuracy: {100 * valid_acc:.2f}')

        epoch = epoch + 1
    return model, optimizer, val_acc_list, (train_losses, valid_losses)

In [None]:
torch.manual_seed(RANDOM_SEED)

model = LeNet5(N_CLASSES, greyscale).to(DEVICE)
if optimizer is "Adam":
  optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
elif optimizer is "RMSprop":
  optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE)
elif optimizer is "SGD":
  optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
else:
  print("Not a valid optimizer")
criterion = nn.CrossEntropyLoss()

model, optimizer, val_acc, _ = training_loop(model, criterion, optimizer, train_loader, valid_loader, N_EPOCHS, DEVICE, time = time)
print("Best val acc: ", max(val_acc))

14:21:08 --- Epoch: 0	Train loss: 2.3044	Valid loss: 2.3008	Train accuracy: 10.00	Valid accuracy: 10.00
14:21:13 --- Epoch: 1	Train loss: 2.3011	Valid loss: 2.2975	Train accuracy: 10.00	Valid accuracy: 10.00
14:21:17 --- Epoch: 2	Train loss: 2.2976	Valid loss: 2.2936	Train accuracy: 13.87	Valid accuracy: 14.18
14:21:22 --- Epoch: 3	Train loss: 2.2936	Valid loss: 2.2891	Train accuracy: 18.39	Valid accuracy: 18.59
14:21:27 --- Epoch: 4	Train loss: 2.2890	Valid loss: 2.2837	Train accuracy: 21.40	Valid accuracy: 21.30
14:21:32 --- Epoch: 5	Train loss: 2.2834	Valid loss: 2.2767	Train accuracy: 27.55	Valid accuracy: 26.92
14:21:36 --- Epoch: 6	Train loss: 2.2763	Valid loss: 2.2682	Train accuracy: 36.67	Valid accuracy: 35.81
14:21:40 --- Epoch: 7	Train loss: 2.2676	Valid loss: 2.2578	Train accuracy: 40.32	Valid accuracy: 39.52
14:21:46 --- Epoch: 8	Train loss: 2.2571	Valid loss: 2.2450	Train accuracy: 41.96	Valid accuracy: 41.31
14:21:51 --- Epoch: 9	Train loss: 2.2441	Valid loss: 2.2295	Trai