In [None]:
!pip install torchmetrics -q

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torchmetrics
from torchvision.datasets import MNIST
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import numpy as np
from IPython.display import display

%matplotlib inline

In [None]:
# Maybe delete this. Not sure if I want to get into GPUs yet, that should be a separate lesson.
device = 'cuda:0' if torch.cuda.is_available() else 'cpu:0'
device

In [None]:
# Load the train and validation data
mnist_train = pd.read_csv('sample_data/mnist_train_small.csv', header=None)
mnist_valid = pd.read_csv('sample_data/mnist_test.csv', header=None)

In [None]:
# Take a look at the data.
# It looks like the first column is the label,
# And columns 1 - 785 are pixels.
print(mnist_train.shape)
mnist_train.head()

In [None]:
# What's the distribuiont of labels in the train set?
mnist_train[0].value_counts().sort_index().plot.bar()

In [None]:
# What's the distribuiont of labels in the validation set?
mnist_train[0].value_counts().sort_index().plot.bar()

In [None]:
def show_number(row, ax=None):
    """
    This function shows a row as an image, and titles it with the label.
    
    Options:
    * row: a row from either of the mnist_train or mnist_valid dataframes.
    * ax: if not None, will plot the digit on the provided ax.
        Otherwise, this function should create a figure and 
    """
    return_fig = ax == None
    target, values = row.values[0], row.values[1:].reshape(28, 28)
    if not ax:
        fig, ax = plt.subplots()
    ax.imshow(values, cmap='gray_r')
    ax.set_title(target)
    
    plt.close()
    if return_fig:
        return fig

In [None]:
show_number(mnist_train.sample(1).iloc[0])

In [None]:
def show_many(n_rows=3, n_cols=3):
    """
    This function shows a number of images at a time, by default 9.
    It takes a random sample of (n_rows * n_cols) of the training data to show.
    """
    # Sample the training data
    train_df_sample = mnist_train.sample(n_rows * n_cols)
    # Create the figure
    fig = plt.figure(figsize=(4*n_cols, 4*n_rows))
    # For each row in the sample, plot the number
    for i, (_, row) in enumerate(train_df_sample.iterrows()):
        ax = fig.add_subplot(n_rows, n_cols, 1+i)
        show_number(row, ax)
    plt.close()
    return fig

In [None]:
show_many()

In [None]:
# What are the min and max values of the data?
mnist_train.loc[:, 1:].values.max(), \
mnist_train.loc[:, 1:].values.min()

In [None]:
plt.hist(mnist_train.loc[:, 1:].values.ravel())
plt.ylabel('Number of pixels')
plt.xlabel('Value')
plt.yscale('log')

In [None]:
# Let's scale all the data between 0 and 1.
mnist_train_scaled = mnist_train.copy()
mnist_train_scaled.loc[:, 1:] /= 255

In [None]:
mnist_valid_scaled = mnist_valid.copy()
mnist_valid_scaled.loc[:, 1:] /= 255

In [None]:
mnist_train_scaled.loc[:, 1:].values.max(), \
mnist_train_scaled.loc[:, 1:].values.min()

In [None]:
class MnistDataset(Dataset):
    pass

In [None]:
# Create datasets from the dataframes
train_ds = MnistDataset(mnist_train_scaled)
valid_ds = MnistDataset(mnist_valid_scaled)

In [None]:
# Sanity check!
_x, _y = train_ds[0]
assert _x.shape[0] == mnist_train_scaled.shape[1] - 1
assert _y == mnist_train_scaled.loc[0,0]

In [None]:
#@markdown Answer: `MnistDataset`
class MnistDataset(Dataset):
    def __init__(self, df):
        super().__init__()
        self.df = df
    
    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        y = torch.tensor(self.df.loc[idx, 0])
        x = torch.FloatTensor(self.df.loc[idx, 1:].values)
        return x, y

In [None]:
# Create dataloaders from the datasets.
# During the training phase, we need to keep both the activations 
# and the gradients in memory. However during the validation phase,
# we don't have to store gradients so we can double the batch size!

train_dl = None
valid_dl = None

In [None]:
#@markdown Answer: `train_dl` and `valid_dl`

train_dl = DataLoader(train_ds, batch_size=128, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=256, shuffle=False)

In [None]:
def linear(in_features, out_features, dropout=0.2):
    """
    Returns an nn.Sequential module that we want to repeat a lot.
    The module contains a linear layer, ReLU activation, BatchNorm, and dropout.
    """
    raise NotImplementedError

In [None]:
# Sanity check!
assert len(linear(1,1)) == 4

In [None]:
#@markdown Answer: `linear`
def linear(in_features, out_features, dropout=0.2, batch_norm=True):
    """
    Returns an nn.Sequential module that we want to repeat a lot.
    The module contains a linear layer, ReLU activation, BatchNorm, and dropout.
    """
    return nn.Sequential(
        nn.Linear(in_features, out_features),
        nn.ReLU(),
        nn.BatchNorm1d(out_features),
        nn.Dropout(dropout),
    )

In [None]:
# Define some parameters for the model
N_INPUT_FEATURES = 28*28
N_HIDDEN_LAYERS = 2
HIDDEN_DIM = 256
OUTPUT_DIM = 10

In [None]:
model = nn.Sequential(
    None
).to(device)

In [None]:
# Sanity check!
assert len(model) == N_HIDDEN_LAYERS + 2

In [None]:
# Another sanity check: our model should be able to operate on a batch of data.
for x_b, y_b in train_dl:
    break
model(x_b.to(device)).shape

In [None]:
#@markdown Answer: `model`
model = nn.Sequential(
    linear(N_INPUT_FEATURES, HIDDEN_DIM),
    *tuple(linear(HIDDEN_DIM, HIDDEN_DIM) for _ in range(N_HIDDEN_LAYERS)),
    nn.Linear(HIDDEN_DIM, OUTPUT_DIM)
).to(device)

In [None]:
loss_func = ...
opt = ...
metric = torchmetrics.Accuracy()

In [None]:
#@markdown Answer: `loss_func`, `opt`, and `metric`
loss_func = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters()) # This can totally be a different optimizer, up to you!
metric = torchmetrics.Accuracy()

In [None]:
def train_step(x_b, y_b):
    # Send x_b and y_b to the GPU, if available
    x_b = ...
    y_b = ...
    # Generate yhat
    yhat = ...
    # Calculate the loss
    loss = ...
    # Calculate gradients

    # Perform your update and zero out your gradients
    opt.
    opt.

    # Update your accuracy metric. We'll give you this one!
    batch_acc = metric(yhat.cpu().softmax(axis=1), y_b.cpu())

    # Return the loss
    return loss

In [None]:
#@markdown Answer: `train_step`
def train_step(x_b, y_b):
    # Send x_b and y_b to the GPU, if available
    x_b = x_b.to(device)
    y_b = y_b.to(device)
    # Generate yhat
    yhat = model(x_b)
    # Calculate the loss
    loss = loss_func(yhat, y_b)
    # Calculate gradients
    loss.backward()
    # Perform your update and zero out your gradients
    opt.step()
    opt.zero_grad()

    # Update your accuracy metric
    batch_acc = metric(yhat.cpu().softmax(axis=1), y_b.cpu())

    return loss

In [None]:
def validation_step(x_b, y_b):
    # Send x_b and y_b to the GPU, if available
    x_b = x_b.to(device)
    y_b = y_b.to(device)
    # Tell torch not to calculate gradients on the validation batch
    with ...
        # Generate yhat
        yhat = ...
        # Calculate the loss
        loss = ...

    # Ok, you've seen this before, you do it this time!
    batch_acc = ...

    # Return the loss
    return loss

In [None]:
#@markdown Answer: `validation_step`
def validation_step(x_b, y_b):
    # Send x_b and y_b to the GPU, if available
    x_b = x_b.to(device)
    y_b = y_b.to(device)
    # Tell torch not to calculate gradients on the validation batch
    with torch.no_grad():
        # Generate yhat
        yhat = model(x_b)
        # Calculate the loss
        loss = loss_func(yhat, y_b)
    # Calculate the batch accuracy
    batch_acc = metric(yhat.cpu().softmax(axis=1), y_b.cpu())
    # Return the loss
    return loss

In [None]:
N_EPOCHS = 10

In [None]:
for epoch in range(N_EPOCHS):
    # Training loop
    model.train() # Put the model in train mode
    train_loss_epoch = 0.
    for x_b, y_b in train_dl:
        loss = train_step(x_b, y_b)
        train_loss_epoch += loss
    
    # Compute the train loss and accuracy for the epoch.
    # The epoch loss is a little bit off if our final batch
    # is a different size - we're going to ignore that for now,
    # since higher-level libraries will solve this for us.
    train_loss_epoch /= len(train_dl)
    train_acc = metric.compute()
    
    # Validation loop
    model.eval() # Put themodel in eval mode (affects dropout and batch norm)
    val_loss_epoch = 0.
    for x_b, y_b in valid_dl:
        loss = validation_step(x_b, y_b)
        val_loss_epoch += loss
    
    val_loss_epoch /= len(valid_dl)
    valid_acc = metric.compute()

    print(f"Epoch: {epoch}, Train loss: {train_loss_epoch:.04f} Train acc: {float(train_acc):.04f}, Valid loss: {val_loss_epoch} Valid Acc: {float(valid_acc):.04f}")

In [None]:
def show_preds():
    """
    Shows a actuals and inferences from a random sample of the validation dataset.
    """
    # Sample a few images
    sample = mnist_valid_scaled.sample(9)
    # Get the sample into a format we can feed into the model
    x_b = torch.FloatTensor(sample.loc[:, 1:].values)
    y_b = sample.loc[:,0].values

    # Make inferences on the sample
    model.eval()
    with torch.no_grad():
        # Get the inferences, apply softmax to convert to predicted probabilities,
        # and use argmax to get the index of the highest probability.
        # This is the digit!
        preds = model(x_b.to(device)).softmax(dim=-1).argmax(dim=-1).cpu().numpy()
    
    # Plot a 3x3 grid of digits, where the title
    # contains the predictiona nd the actual value.
    fig = plt.figure(figsize=(12, 12))
    for i, (x, y, p) in enumerate(zip(x_b, y_b, preds)):
        ax = fig.add_subplot(3, 3, 1+i)
        ax.matshow(x.reshape(28, 28), cmap='Greys_r')
        ax.set_title(f'Actual: {y}, Pred: {int(p)}')
        ax.set_xticks([])
        ax.set_yticks([])
    fig.tight_layout()
    plt.close()
    return fig

In [None]:
show_preds()