In [9]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import random_split
from datetime import datetime
import numpy as np

# Getting the same results with train and train_manual_update
- Write torch.manual_seed(42) at the beginning of your notebook.
- Write torch.set_default_dtype(torch.double) at the beginning of your notebook to alleviate precision errors

In [10]:
torch.manual_seed(42)
torch.set_default_dtype(torch.double)

# Tasks
Load, analyse and preprocess the CIFAR-10 dataset. Split it into 3
datasets: training, validation and test. Take a subset of these datasets
by keeping only 2 labels: bird and airplane

In [None]:
def load_cifar(train_val_split=0.9, data_path='../data/', preprocessor=None):
    if preprocessor is None:
        preprocessor = transforms.ToTensor()

    with torch.no_grad():
        # Loading CIFAR 10 training + validation data
        trainval_full = datasets.CIFAR10(
            root=data_path,
            train=True,
            download=True,
            transform=preprocessor,
        )

        # Loading CIFAR 10 without training and validation data
        test_full = datasets.CIFAR10(
            root=data_path,
            train=False,
            download=True,
            transform=preprocessor,
        )


        # Filtering out "plane" and "bird" classes only from CIFAR 10
        allowed_labels = {0, 2}

        trainval_indices = [
            i for i in range(len(trainval_full))
            if trainval_full.targets[i] in allowed_labels
        ]
        trainval_full = torch.utils.data.Subset(trainval_full, trainval_indices)

        test_indices = [
            i for i in range(len(test_full))
            if test_full.targets[i] in allowed_labels
        ]
        test_data = torch.utils.data.Subset(test_full, test_indices)


        # Splitting training and validation data
        n_total = len(trainval_full)
        n_train = int(train_val_split * n_total)
        n_val = n_total - n_train

        train_data, val_data = random_split(trainval_full, [n_train, n_val])

    return train_data, val_data, test_data


def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            logits = model(x)
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    return correct / total

Write a MyMLP class that implements a MLP in PyTorch (so only fully
connected layers) such that:
    
    - The input dimension is 768(= 16 ∗ 16 ∗ 3) and the output dimension is 2 (for the 2 classes).
    - The hidden layers have respectively 128 and 32 hidden units.
    - All activation functions are ReLU. The last layer has no activation function since the cross-entropy loss already includes a softmax activation
function.

In [12]:
class MyNet(nn.Module):
    #TODO

SyntaxError: incomplete input (2919787615.py, line 2)

Write a train(n_epochs, optimizer, model, loss_fn, train_loader) function that trains model for n_epochs epochs given an optimizer optimizer, a loss function loss_fn and a dataloader train_loader.

In [None]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
 #TODO


Write a similar function train manual_update that has no optimizer parameter, but a learning rate lr parameter instead and that manually updates each trainable parameter of model using equation (2). Do not forget to zero out all gradients after each iteration. 

Train 2 instances of MyMLP, one using train and the other using train_manual_update (use the same parameter values for both models). Compare their respective training losses. To get exactly the same results with both functions, see section 3.3

In [None]:
def train_manual_update(n_epochs, model, loss_fn, train_loader, lr=1e-2, momentum_coeff=0., weight_decay=0.):
    #TODO