# Workshop 2: Handwritten digit classification with PyTorch
In this workshop we will learn how to train a neural network with images 
as input to classify hand-written digits
([info of the data](http://yann.lecun.com/exdb/mnist/)). The main blocks of the workshop are:

1. Get the data from PyTorch repository and visualize it.
2. Pre-process the data.
3. Design the network.
4. Train the network.
5. Evaluate the model.

# 1. Get the data from PyTorch repository and visualize it.

In [None]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchsummary import summary

import numpy as np
import matplotlib.pyplot as plt

In [None]:
# set random seed for reproducibility
torch.manual_seed(42)

In [None]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, download=True)

In [None]:
train_dataset

In [None]:
train_dataset[0]

In [None]:
train_dataset[0][0].show()

In [None]:
sample_array = np.array(train_dataset[0][0])
print("Sample shape:", sample_array.shape)
print("Sample min value:", sample_array.min())
print("Sample max value:", sample_array.max())

In [None]:
test_dataset

In [None]:
test_dataset[0]

In [None]:
test_dataset[0][0].show()

# 2. Pre-process the data.

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# Split the test set into validation and test sets
valid_dataset, test_dataset = torch.utils.data.random_split(test_dataset, [5000, 5000])
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
images, labels = next(iter(train_loader))
print("Sample shape:", images.shape)
print("Sample min value:", images.min())
print("Sample max value:", images.max())

In [None]:
# Import dependence for visualization of images
plt.rcParams['figure.figsize'] = (10,10)  # Configure figure size for 
                                          # appropriate visualization

In [None]:
# Plot the images in a 3x3 grid
nrows = 3
ncols = 3
nsamples = nrows*ncols
for i in range(nsamples):
    plt.subplot(3,3,i+1)
    plt.imshow(images[i, 0, :, :], cmap='gray', interpolation='none')
    plt.title("Class {}".format(labels[i]))

# 3. Design the network.

In [None]:
# Define model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.softmax(self.fc2(x), dim=1)
        return x

model = Net()

In [None]:
summary(model, (1, 28, 28))

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 4. Train the network.

In [None]:
def get_accuracy(y_pred, y_true):
    """Calculate the accuracy between predicted and true labels"""
    _, y_pred = torch.max(y_pred, dim=1)
    correct = torch.sum(y_pred == y_true).float()
    acc = correct / len(y_true)
    return acc

In [None]:
# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_loss_history = []
valid_loss_history = []
train_accuracy_history = []
valid_accuracy_history = []

num_epochs = 10

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):

        # Clear gradients
        optimizer.zero_grad()

        # Move images and labels to device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

    # Calculate accuracy
    train_loss = 0
    valid_loss = 0
    train_accuracy = 0
    valid_accuracy = 0

    # Turn off gradients for validation to speed up inference
    with torch.no_grad():
        for images, labels in train_loader:

            # Move images and labels to device
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            output = model(images)

            # Calculate loss
            train_loss += criterion(output, labels)

            # Calculate accuracy
            train_accuracy += get_accuracy(output, labels)
        
        train_loss_history.append(train_loss.cpu().numpy() / len(train_loader))
        train_accuracy_history.append(train_accuracy.cpu().numpy() / len(train_loader))

        for images, labels in valid_loader:

            # Move images and labels to device
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            output = model(images)

            # Calculate loss
            valid_loss += criterion(output, labels)

            # Calculate accuracy
            valid_accuracy += get_accuracy(output, labels)
        
        valid_loss_history.append(valid_loss.cpu().numpy() / len(valid_loader))
        valid_accuracy_history.append(valid_accuracy.cpu().numpy() / len(valid_loader))

        print(f"Epoch {epoch+1}/{num_epochs} | "
              f"Train loss: {train_loss/len(train_loader):.3f} | "
              f"Train accuracy: {train_accuracy/len(train_loader):.3f} | "
              f"Valid loss: {valid_loss/len(valid_loader):.3f} | "
              f"Valid accuracy: {valid_accuracy/len(valid_loader):.3f}")

In [None]:
# Plot training and validation accuracy
plt.plot(train_accuracy_history)
plt.plot(valid_accuracy_history)
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [None]:
# Plot training and validation accuracy
plt.plot(train_loss_history)
plt.plot(valid_loss_history)
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

# 5. Evaluate the model.

In [None]:
# Calculate metrics in the test partition
test_loss = 0
test_accuracy = 0
for images, labels in test_loader:

    # Move images and labels to device
    images, labels = images.to(device), labels.to(device)
    
    # Forward pass
    output = model(images)

    # Calculate loss
    test_loss += criterion(output, labels)

    # Calculate accuracy
    test_accuracy += get_accuracy(output, labels)
print("Test loss:", test_loss / len(test_loader))
print("Test accuracy:", test_accuracy / len(test_loader))

# Exercise 1: Create a new model with 512 neurons in the hidden layer and repeat the process

In [None]:
# Design the model


In [None]:
# Show summary of the model


In [None]:
# Define loss function and optimizer


In [None]:
# Train the model


In [None]:
# Obtain metrics in the test partition


# Exercise 2: Add a hidden layer with 512 neurons to the model of exercise 1 and repeat the process

In [None]:
# Design the model


In [None]:
# Show summary of the model


In [None]:
# Define loss function and optimizer


In [None]:
# Train the model


In [None]:
# Obtain metrics in the test partition
