In [1]:
# import packages
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Load and preprocess the MNIST dataset

# Define an array transformation that transforms the images to tensor format 
# and normalizes the pixel values to the range [-1, 1]
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download and load the training and test datasets
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, 
    download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, 
    download=True, transform=transform)

# Split the training dataset into a training set and a validation set
train_set, val_set = random_split(train_dataset, [50000, 10000])

# Create data loaders for the training, validation, and test sets
# A DataLoader in PyTorch is an object that simplifies and automates
# batching, shuffling, and loading data for model training and evaluation. 
train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
# Define CNN architecture
class CNN(nn.Module):
    """
    A Convolutional Neural Network (CNN) for classifying MNIST images.
    
    The network consists of three convolutional layers followed by ReLU activations and max pooling layers.
    It also includes two fully connected layers for classification.

    Attributes:
    conv1 : torch.nn.Conv2d
        The first convolutional layer
    conv2 : torch.nn.Conv2d
        The second convolutional layer
    conv3 : torch.nn.Conv2d
        The third convolutional layer
    relu : torch.nn.ReLU
        The ReLU activation function
    pool1 : torch.nn.MaxPool2d
        The first max pooling layer
    pool2 : torch.nn.MaxPool2d
        The second max pooling layer
    fc1 : torch.nn.Linear
        The first fully connected layer
    fc2 : torch.nn.Linear
        The second fully connected layer

    Methods:
    __init__(self): 
        This function creates an instance of this class.
    forward(self, x):
        Performs a forward pass for an input x.
    """

    def __init__(self):
        """
        Initialize the CNN model by defining its layers.
        """
        super(CNN, self).__init__()
        # Define the first convolutional layer
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        # Define the second convolutional layer
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        # Define the activation function
        self.activation = nn.Sigmoid()
        # Define a pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        # Define a fully connected layer hidden with 128 nodes
        # Inputs are num_channels in last layer x image height x image width
        self.fc1 = nn.Linear(16 * 8 * 8, 128)
        # Define the output layer with 10 nodes
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        """
        Define the forward pass of the CNN.

        Parameters:
        x : torch.Tensor
            The input tensor containing the image batch.

        Returns:
        torch.Tensor
            The output tensor containing the class scores for each image.
        """
        # Pass the input through the first convolutional layer, then apply activation
        x = self.activation(self.conv1(x))
        # Pass the input through the first pooling layer
        x = self.pool(x)
        # Pass the input through the second convolutional layer, then apply activation
        x = self.activation(self.conv2(x))
        # Pass the input through the second pooling layer
        x = self.pool(x)     
        # Change the shape of x into a 1d array
        x = x.view(-1, 16 * 8 * 8)
        # Pass the input through the full connected hidden layer, then apply activation
        x = self.activation(self.fc1(x))
        # Pass the input through the last layer
        x = self.fc2(x)
        return x

In [None]:
# Define training pipeline including validation after each epoch
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=3):
    """
    Train the CNN model.

    Parameters:
    model : torch.nn.Module
        The CNN model to be trained.
    train_loader : torch.utils.data.DataLoader
        The data loader for the training set.
    val_loader : torch.utils.data.DataLoader
        The data loader for the validation set.
    criterion : torch.nn.modules.loss._Loss
        The loss function to be used.
    optimizer : torch.optim.Optimizer
        The optimizer to be used.
    epochs : int
        The number of epochs for training.

    Returns:
    tuple
        A tuple containing lists of training loss, validation loss, training accuracy, and validation accuracy.
    """
    # Initialize lists to store training and validation loss and accuracy
    train_loss, val_loss = [], []
    train_acc, val_acc = [], []

    # Loop over the number of epochs
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0  # Initialize the running loss for the epoch
        correct, total = 0, 0  # Initialize counters for correct predictions and total samples
        
        # Iterate over batches of training data
        for images, labels in train_loader:
            optimizer.zero_grad()  # Zero the gradients to prevent accumulation from previous iterations
            outputs = model(images)  # Perform a forward pass through the model to get predictions
            loss = criterion(outputs, labels)  # Compute the loss between predictions and true labels
            loss.backward()  # Perform a backward pass to compute gradients
            optimizer.step()  # Update model parameters based on the computed gradients
            
            running_loss += loss.item()  # Accumulate the loss
            _, predicted = torch.max(outputs.data, 1)  # Get the predicted class with the highest score
            total += labels.size(0)  # Update the total number of samples
            correct += (predicted == labels).sum().item()  # Update the number of correct predictions
        
        train_loss.append(running_loss / len(train_loader))  # Compute and store the average training loss for the epoch
        train_acc.append(100 * correct / total)  # Compute and store the training accuracy for the epoch
        
        model.eval()  # Set the model to evaluation mode
        val_running_loss = 0.0  # Initialize the running loss for validation
        val_correct, val_total = 0, 0  # Initialize counters for correct predictions and total samples in validation
        
        # Disable gradient calculation for validation to save memory and computation
        with torch.no_grad():
            # Iterate over batches of validation data
            for images, labels in val_loader:
                outputs = model(images)  # Perform a forward pass through the model to get predictions
                loss = criterion(outputs, labels)  # Compute the loss between predictions and true labels
                val_running_loss += loss.item()  # Accumulate the loss
                _, predicted = torch.max(outputs.data, 1)  # Get the predicted class with the highest score
                val_total += labels.size(0)  # Update the total number of samples in validation
                val_correct += (predicted == labels).sum().item()  # Update the number of correct predictions in validation
        
        val_loss.append(val_running_loss / len(val_loader))  # Compute and store the average validation loss for the epoch
        val_acc.append(100 * val_correct / val_total)  # Compute and store the validation accuracy for the epoch
        
        # Print the results for the current epoch, including training and validation loss and accuracy
        print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {running_loss / len(train_loader):.4f}, '
              f'Validation Loss: {val_running_loss / len(val_loader):.4f}, '
              f'Train Acc: {100 * correct / total:.2f}%, Val Acc: {100 * val_correct / val_total:.2f}%')
    
    return train_loss, val_loss, train_acc, val_acc  # Return the lists of training and validation loss and accuracy

In [None]:
# Plot training & validation accuracy/loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')

plt.show()

In [None]:
# Evaluate the model on test set
model.eval()
test_correct, test_total = 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_acc = 100 * test_correct / test_total
print(f'Test Accuracy: {test_acc:.2f}%')