# Training an Image Classifier on the MNIST Dataset

Two sentence description of the MNIST dataset.

Run the cell below to import the necessary modules and libraries.

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as data
from torch.utils.data import random_split

import numpy

First, let's create our classifier. 
- Create a class called `ImageClassifier` that inherits from `torch.nn.Module`.
- Make a simple two-layer network inside the class constructor. The input linear layer should have input size appropriate to a 28x28 pixel image, and an output size of 128.
- The output linear layer should have an output size of 10, reflecting the number of classes in the `MNIST` dataset.
- The two linear layers should be connected by an activation layer.
- Don't forget to add inheritance from `nn.Module` by calling the `super` constructor.
- Create the `forward` method.

In [2]:
class SimpleClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        # Define input, activation and output layers
        self.layers = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(28 * 28, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 10),
            torch.nn.ReLU(),
            torch.nn.Softmax()
        )


    def forward(self, x):
        return self.layers(x)

Next we create our image transform, and load the dataset. We can quickly load the dataset from the `torchvision.datasets` module as follows:

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

mnist_dataset = datasets.MNIST(root='mnist', train=True, download=True, transform=transform)

Now we need to perform a split on the data so that we can train our model and evaluate it. 

- Split the dataset into a training set comprising 80% of the data, and a test set comprising 20% of the data. Call these subsets `train_set` and `test_set`.
- Assign each split to its own dataloader, called `train_loader` and `test_loader` respectively. Set `shuffle=True` for the train loader.

In [4]:
# Define the sizes of the train and test set
total_size = len(mnist_dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

# Use the `random_split()` method to create a `train` and `test` dataset
train_data, test_data = random_split(mnist_dataset, [train_size, test_size])

# Create the training dataloader
train_loader = data.DataLoader(train_data, shuffle=True, pin_memory=True)

# Create the test dataloader
train_loader = data.DataLoader(test_data)

To get everything ready for training, we need to initialise the model, an optimiser and a criterion. In the code block below, initialise an instance of your model class, as well as an optimiser for Stochastic Gradient Descent (SGD), and an appropriate loss criterion.

In [5]:
# Initialize the model and optimizer

model = SimpleClassifier()
optimiser = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.functional.cross_entropy


Create the training loop inside a function called `train`.

In [6]:
# Train the model

def train(model: torch.nn.Module, train_loader: data.DataLoader, optimiser: optim.Optimizer, criterion) -> None:
    for epoch in range(10):
        running_loss = 0.0
        for images, labels in train_loader:
            model.zero_grad()
            predictions = model(images)
            
            loss = criterion(predictions, labels)
            loss.backward()
            running_loss += loss.item()
            
            optimiser.step()

        print(f'Epoch [{epoch + 1}/10], Loss: {running_loss / len(train_loader)}')

    print('Finished Training')

train(model, train_loader, optimiser, criterion)

  input = module(input)


Epoch [1/10], Loss: 1.8761663240393003
Epoch [2/10], Loss: 1.689560981084903
Epoch [3/10], Loss: 1.656089256733656
Epoch [4/10], Loss: 1.6406358544031778
Epoch [5/10], Loss: 1.6157760947346687
Epoch [6/10], Loss: 1.5386700637340545
Epoch [7/10], Loss: 1.5301534321208794
Epoch [8/10], Loss: 1.522450298746427
Epoch [9/10], Loss: 1.5194228985011577
Epoch [10/10], Loss: 1.513732946028312
Finished Training


Now let's see how the model performs on an example from the testing set. 

In [19]:
test_example, test_label = next(iter(test_data))
model.eval()

prediction = model(test_example)
probabilities: numpy.ndarray = torch.nn.functional.softmax(prediction).detach().numpy()

predicted_label = probabilities.argmax()
print(f"""
Predicted label: {predicted_label}
Label: {test_label}
""")

# TODO - Get a single example from the test dataset
# TODO - Set model to eval()
# TODO - Pass the example to the model to get the prediction logits.
# TODO - Take the softmax of the logits
# TODO - Print the class label for the prediction of highest likelihood.
# TODO - Print the real target label for the example.


Predicted label: 1
Label: 1



  probabilities: numpy.ndarray = torch.nn.functional.softmax(prediction).detach().numpy()


## Train on GPU

In [13]:
def train_gpu(model: torch.nn.Module, train_loader: data.DataLoader, optimiser: optim.Optimizer, criterion) -> None:
    for epoch in range(10):
        print(f"Epoch [{epoch + 1}/10], ", end='')
        running_loss = 0.0
        for images, labels in train_loader:
            torch.cuda.synchronize()
            images = images.to("cuda")
            labels = labels.to("cuda")

            model.zero_grad()
            predictions = model(images)
            
            loss = criterion(predictions, labels)
            loss.backward()
            running_loss += loss.item()
            
            optimiser.step()

        print(f'Loss: {running_loss / len(train_loader)}')

    print('Finished Training')


model = SimpleClassifier().to("cuda")
optimiser = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.functional.cross_entropy
train_gpu(model, train_loader, optimiser, criterion)

Epoch [1/10], 

  input = module(input)


KeyboardInterrupt: 