In [32]:
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
import torch
from torchvision.datasets import MNIST
from torch.utils.data import Subset
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
import logging
import os

## Exercise 1: MLP and Residual MLP on MNIST
Replication, on a small scale, of the ResNet paper results, demonstrating the fact that deeper networks do not guarantee more reduction in training loss (or in validation accuracy) since a degradation problem is exposed: with the network depth increasing, accuracy gets saturated and then degrades rapidly. The paper's authors address the degradation problem by introducing a deep residual learning framework, fitting identity mapping within each block.

1. myMLP( ) defines a Multi Layer Perceptron with an hidden fully connected layer, a Gelu as the non-linear activation function between layers and a final logaritmic softmax to generate the output probabilities for classification task;

2. ResidualMLP( ) adds in the forward function the identity mapping between the input and the last layer output, a.k.a. a skip connection

In [33]:
# models and dataloaders definition
from torchvision.models import resnet18
from src.models import MLP_2layers, MLP_3layers, ResidualMLP, myCNN
from src.dataloader import get_cifar10_loaders, get_mnist_loaders

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

model_MLP_2layers = MLP_2layers().to(device)
model_MLP_3layers = MLP_3layers().to(device)
model_ResidualMLP = ResidualMLP().to(device)
model_myCNN = myCNN().to(device)
model_resnet18= resnet18(pretrained=True).to(device)

mnist_train_loader, mnist_val_loader, mnist_test_loader = get_mnist_loaders(batch_size=64)
cifar10_train_loader, cifar10_val_loader, cifar10_test_loader = get_cifar10_loaders(batch_size=128)

MNIST DataLoaders pronti.
  - Training samples: 55000, Validation samples: 5000, Test samples: 10000
CIFAR-10 DataLoaders pronti.
  - Training samples: 45000, Validation samples: 5000, Test samples: 10000


In [36]:
# training and testing the above models
from src.trainer import trainer
from src.tester import tester

lab1_root = os.path.dirname(os.path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
save_dir = os.path.join(lab1_root, "models")

train_losses_MLP_2layers, val_losses_MLP_2layers, val_accuracies_MLP_2layers, best_val_acc_MLP_2layers = trainer(model_MLP_2layers, mnist_train_loader, mnist_val_loader, device)
# model_MLP_2layers.load_state_dict(torch.load("models/best_MLP_2layers.pth"))
model_MLP_2layers.load_state_dict(torch.load(os.path.join(save_dir, "best_MLP_2layers.pth")))
test_loss_MLP_2layers, test_accuracy_MLP_2layers = tester(model_MLP_2layers, mnist_test_loader, device)

train_losses_MLP_3layers, val_losses_MLP_3layers, val_accuracies_MLP_3layers, best_val_acc_MLP_3layers = trainer(model_MLP_3layers, mnist_train_loader, mnist_val_loader, device)
# model_MLP_3layers.load_state_dict(torch.load("models/best_MLP_3layers.pth"))
model_MLP_3layers.load_state_dict(torch.load(os.path.join(save_dir, "best_MLP_3layers.pth")))
test_loss_MLP_3layers, test_accuracy_MLP_3layers = tester(model_MLP_3layers, mnist_test_loader, device)

train_losses_ResidualMLP, val_losses_ResidualMLP, val_accuracies_ResidualMLP, best_val_acc_ResidualMLP = trainer(model_ResidualMLP, mnist_train_loader, mnist_val_loader, device)
# model_ResidualMLP.load_state_dict(torch.load("models/best_ResidualMLP.pth"))
model_ResidualMLP.load_state_dict(torch.load(os.path.join(save_dir, "best_ResidualMLP.pth")))
test_loss_ResidualMLP, test_accuracy_ResidualMLP = tester(model_ResidualMLP, mnist_test_loader, device)

train_losses_myCNN, val_losses_myCNN, val_accuracies_myCNN, best_val_acc_myCNN = trainer(model_myCNN, cifar10_train_loader, cifar10_val_loader, device)
# model_myCNN.load_state_dict(torch.load("models/best_myCNN.pth"))
model_myCNN.load_state_dict(torch.load(os.path.join(save_dir, "best_myCNN.pth")))
test_loss_myCNN, test_accuracy_myCNN = tester(model_myCNN, cifar10_test_loader, device)

train_losses_resnet18, val_losses_resnet18, val_accuracies_resnet18, best_val_acc_resnet18 = trainer(model_resnet18, cifar10_train_loader, cifar10_val_loader, device)
# model_resnet18.load_state_dict(torch.load("models/best_resnet18.pth"))
model_resnet18.load_state_dict(torch.load(os.path.join(save_dir, "best_resnet18.pth")))
test_loss_resnet18, test_accuracy_resnet18 = tester(model_resnet18, cifar10_test_loader, device)

860 iterations per epoch
79 val iterations per epoch


Training MLP_2layers:   0%|                   | 0/100 [00:12<?, ?it/s]


RuntimeError: Parent directory ../DeepLearningApplications-Labs/LAB1/models does not exist.

In [None]:
# printing testing results
print(f"MLP 2 layers - Test Loss: {test_loss_MLP_2layers:.4f}, Test Accuracy: {test_accuracy_MLP_2layers:.4f}") 
print(f"MLP 3 layers - Test Loss: {test_loss_MLP_3layers:.4f}, Test Accuracy: {test_accuracy_MLP_3layers:.4f}")
print(f"Residual MLP - Test Loss: {test_loss_ResidualMLP:.4f}, Test Accuracy: {test_accuracy_ResidualMLP:.4f}")
print(f"myCNN - Test Loss: {test_loss_myCNN:.4f}, Test Accuracy: {test_accuracy_myCNN:.4f}")
print(f"ResNet18 - Test Loss: {test_loss_resnet18:.4f}, Test Accuracy: {test_accuracy_resnet18:.4f}")

In [None]:
# plotting training and validation performances
from src.utils import plot_single_performance, plot_all_performances

plot_single_performance(train_losses_MLP_2layers, val_losses_MLP_2layers, val_accuracies_MLP_2layers, "MLP 2 Layers")
plot_single_performance(train_losses_MLP_3layers, val_losses_MLP_3layers, val_accuracies_MLP_3layers, "MLP 3 Layers")
plot_single_performance(train_losses_ResidualMLP, val_losses_ResidualMLP, val_accuracies_ResidualMLP, "Residual MLP")
plot_single_performance(train_losses_myCNN, val_losses_myCNN, val_accuracies_myCNN, "myCNN")
plot_single_performance(train_losses_resnet18, val_losses_resnet18, val_accuracies_resnet18, "ResNet18")

In [None]:
from src.utils import plot_single_performance, plot_all_performances

all_models_data = {
    "MLP 2 Layers": {
        "train_losses": train_losses_MLP_2layers,
        "val_losses": val_losses_MLP_2layers,
        "val_accuracies": val_accuracies_MLP_2layers
    },
    "MLP 3 Layers": {
        "train_losses": train_losses_MLP_3layers,
        "val_losses": val_losses_MLP_3layers,
        "val_accuracies": val_accuracies_MLP_3layers
    },
    "Residual MLP": {
        "train_losses": train_losses_ResidualMLP,
        "val_losses": val_losses_ResidualMLP,
        "val_accuracies": val_accuracies_ResidualMLP
    },
    "myCNN": {
        "train_losses": train_losses_myCNN,
        "val_losses": val_losses_myCNN,
        "val_accuracies": val_accuracies_myCNN
    },
    "ResNet18": {
        "train_losses": train_losses_resnet18,
        "val_losses": val_losses_resnet18,
        "val_accuracies": val_accuracies_resnet18
    }
}

plot_all_performances(all_models_data)