In [33]:
"""
This module implements training and evaluation of a multi-layer perceptron in PyTorch.
You should fill in code into indicated sections.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import numpy as np
import json
import math

import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns
sns.set()

from copy import deepcopy

# Progress bar
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

import tensorboard as tb
from torch.utils.tensorboard import SummaryWriter

In [34]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [35]:
class MLP(nn.Module):

    def __init__(self, n_inputs, n_hidden, n_classes, use_batch_norm=False):

        super().__init__()
        layers = []
        dims = [n_inputs] + list[n_hidden] + [n_classes]

        for i in range(len(dims) - 1):
            fan_in, fan_out = dims[i], dims[i+1]

            lin = nn.Linear(fan_in, fan_out, bias=True)
            nn.init.kaiming_normal_(lin.weight, mode='fan_in', nonlinearity='leaky_relu')
            nn.init.zeros_(lin.bias)
            layers.append(lin)

            if i < len(dims) - 2:
                if use_batch_norm:
                    layers.append(nn.BatchNorm1d(fan_out))
                layers.append(nn.SiLU())


        self.net = nn.Sequential(*layers)


    def forward(self, x):
        x = x.view()
        out = self.net(x)

        return out

In [36]:
def accuracy(predictions, targets):

    preds = predictions.argmax(dim=1)
    correct = (preds == targets)
    accuracy = correct.float().mean().item()
    return accuracy

def eval_model(model, data_loader):

    model.eval()

    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, targets in data_loader:
            outputs = model(inputs)

            preds = outputs.argmax(dim=1)

            total_correct += (preds == targets).sum().item()
            total_samples += targets.size(0)

    avg_accuracy = total_correct / total_samples
    #######################
    # END OF YOUR CODE    #
    #######################

    return avg_accuracy

In [37]:
# Path to the folder where the datasets are/should be downloaded (e.g. MNIST)
DATASET_PATH = "../data"
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "../saved_models/tutorial3"

# Function for setting the seed
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): # GPU operation have separate seed
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
set_seed(42)

# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Fetching the device that will be used throughout this notebook
device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda:0")
print("Using device", device)

Using device cpu


In [38]:
import torchvision
from torchvision.datasets import FashionMNIST
from torchvision import transforms

# Transformations applied on each image => first make them a tensor, then normalize them in the range -1 to 1
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.5,), (0.5,))])

# Loading the training dataset. We need to split it into a training and validation part
train_dataset = FashionMNIST(root=DATASET_PATH, train=True, transform=transform, download=True)
train_set, val_set = torch.utils.data.random_split(train_dataset, [50000, 10000])

# Loading the test set
test_set = FashionMNIST(root=DATASET_PATH, train=False, transform=transform, download=True)

# We define a set of data loaders that we can use for various purposes later.
# Note that for actually training a model, we will use different data loaders
# with a lower batch size.
train_loader = data.DataLoader(train_set, batch_size=1024, shuffle=True, drop_last=False)
val_loader = data.DataLoader(val_set, batch_size=1024, shuffle=False, drop_last=False)
test_loader = data.DataLoader(test_set, batch_size=1024, shuffle=False, drop_last=False)

In [39]:
def train_model(
    model,
    train_loader,
    test_loader,
    batch_size,
    epochs,
    seed,
    lr,
    device,
    data_dir="runs/fashionmnist_experiment"
):  
    set_seed(seed)

    writer = SummaryWriter(data_dir)

    loss_module = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # initializing best tracking
    best_val_acc = -1.0
    best_epoch = -1
    best_model = deepcopy(model)

    for epoch in range(epochs):
        model.train()

        running_loss = 0.0
        total_correct = 0
        total_samples = 0

        for inputs, targets in train_loader:
            # push to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # set gradient to nothing
            optimizer.zero_grad()

            # forward
            logits = model(inputs)
            loss = loss_module(logits, targets)

            # backward + update
            loss.backward()
            optimizer.step()

            # stats
            running_loss += loss.item()

            preds = logits.argmax(dim=1)  # <-- FIX: logits, not targets
            total_correct += (preds == targets).sum().item()
            total_samples += targets.size(0)

        # epoch metrics (outside batch loop!)
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = total_correct / total_samples

        # write to tensorboard
        writer.add_scalar("Loss/train", epoch_loss, epoch)
        writer.add_scalar("Accuracy/train", epoch_acc, epoch)

        # validation
        val_acc = eval_model(model, test_loader)  # uses no_grad + eval inside
        writer.add_scalar("Accuracy/val", val_acc, epoch)

        # track best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_epoch = epoch
            best_model = deepcopy(model)

        print(
            f"Epoch {epoch+1}/{epochs} | "
            f"train_loss={epoch_loss:.4f} | train_acc={epoch_acc:.4f} | "
            f"val_acc={val_acc:.4f} | best_val_acc={best_val_acc:.4f} (epoch {best_epoch+1})"
        )

    # Test best model
    best_test_acc = eval_model(best_model, test_loader)
    writer.add_scalar("Accuracy/best_test", best_test_acc, epochs)

    writer.close()

    torch.save(best_model.state_dict(), "best_model.pth")

    return best_model, best_val_acc, best_epoch, best_test_acc

In [40]:
model = MLP(n_inputs=[784], 
            n_hidden=[256, 256, 256, 256, 256, 128, 128, 128, 128], 
            n_classes=[10], use_batch_norm=True)

train_model(model=model, 
            train_loader=train_loader,
            test_loader=test_loader,
            batch_size=1024,
            epochs=30,
            seed=42,
            lr=0.1)

TypeError: can only concatenate list (not "types.GenericAlias") to list

In [None]:
%tensorboard --logdir runs/our_experiment