# Training a digit classification ML model on the mnist dataset

## Importing all neccesary libraries:

In [17]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

## Selecting Nvida GPU as device:

In [18]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
print(device) # check wether gpu was found

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(torch.cuda.current_device()))

cuda
True
1
0
NVIDIA GeForce RTX 4070 SUPER


## Defining Hyperparameters

In [19]:
batch_size = 100

## Data Handling:

##### DataSet Object

In [20]:
class MyDataset(Dataset):
    def __init__(self, filepath):
        self.x, self.y = torch.load(filepath)
        
        # Normalizing the values to [0,1]
        self.x = self.x / 255. 
        
        # One Hot Encoding the class labels as they are nominal:
        self.y = F.one_hot(self.y, num_classes=10).to(float) 
        
    def __len__(self): 
        return self.x.shape[0]
    def __getitem__(self, i): 
        return self.x[i], self.y[i]

#### Loading Training Set and Data Set

In [21]:
training_set = MyDataset('MNIST/processed/training.pt')
test_set = MyDataset('MNIST/processed/test.pt')

In [22]:
print(len(training_set))
print(len(test_set))

60000
10000


#### DataLoader

In [23]:
training_loader = DataLoader(training_set, batch_size=batch_size)

## Loss Function

In [24]:
L = nn.CrossEntropyLoss()

## Neural Network Architecture

In [25]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no activation and no softmax at the end
        return out

In [26]:
model = NeuralNet(input_size = 784, hidden_size = 500, num_classes = 10).to(device)

## Building Training Loop

In [27]:
def train_model(train_loader, model, num_epochs, learning_rate):
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
    
    # Track loss and performance metrics across epochs
    epoch_losses = []
    epoch_precisions = []
    epoch_recalls = []
    epoch_f1s = []
    
    for epoch in range(num_epochs):
        model.train()
        all_labels = []
        all_predictions = []
        running_loss = 0.0
    
        for i, (images, labels) in enumerate(train_loader):
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
    
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, torch.argmax(labels, dim=1))
    
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
    
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(torch.argmax(labels, dim=1).cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
    
        # Calculate average loss for the epoch
        epoch_loss = running_loss / len(train_loader)
        epoch_losses.append(epoch_loss)
    
        # Calculate performance metrics for the epoch
        precision = precision_score(all_labels, all_predictions, average='macro')
        recall = recall_score(all_labels, all_predictions, average='macro')
        f1 = f1_score(all_labels, all_predictions, average='macro')
    
        epoch_precisions.append(precision)
        epoch_recalls.append(recall)
        epoch_f1s.append(f1)
    
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}')
    
    torch.save(model.state_dict(), 'trained_models/nn.pth')
    print('Finished Training')
    print()
    print('Hyperparameters used for training:')
    print(f'Number of Epochs: {num_epochs}, Batch Size: {batch_size}, learning rate: {learning_rate}')
    print()

# Note: Ensure 'device' and 'batch_size' are defined before calling this function.


## Training Model

In [40]:
train_model(training_loader, model, num_epochs = 12, learning_rate = 0.001)

Epoch [1/12], Loss: 0.0004, Precision: 0.9999, Recall: 0.9999, F1-Score: 0.9999
Epoch [2/12], Loss: 0.0001, Precision: 0.9999, Recall: 0.9999, F1-Score: 0.9999
Epoch [3/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [4/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [5/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [6/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [7/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [8/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [9/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [10/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [11/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Epoch [12/12], Loss: 0.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Finished Training

Hyperparameters us

## Testing Model

In [41]:
test_loader = DataLoader(test_set, batch_size=batch_size)

model.eval()

all_labels = []
all_predictions = []


with torch.no_grad():
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        all_labels.extend(torch.argmax(labels, dim=1).cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

# Calculate accuracy
accuracy = 100 * (np.array(all_predictions) == np.array(all_labels)).sum() / len(all_labels)
precision = precision_score(all_labels, all_predictions, average='macro')
recall = recall_score(all_labels, all_predictions, average='macro')
f1 = f1_score(all_labels, all_predictions, average='macro')

print(f'Accuracy of the network on the test images: {accuracy:.2f}%')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')

# Optionally, print the confusion matrix
cm = confusion_matrix(all_labels, all_predictions)
print("Confusion Matrix:")
print(cm)

Accuracy of the network on the test images: 98.54%
Precision: 0.9853
Recall: 0.9853
F1-Score: 0.9853
Confusion Matrix:
[[ 974    1    1    0    0    1    1    1    1    0]
 [   0 1128    2    1    0    0    1    1    2    0]
 [   1    1 1014    3    3    0    2    2    6    0]
 [   1    0    4  998    0    3    0    1    2    1]
 [   1    0    2    0  966    0    4    0    0    9]
 [   2    0    1    8    1  876    2    1    1    0]
 [   3    2    0    1    3    2  946    0    1    0]
 [   1    2    5    1    0    0    0 1013    2    4]
 [   3    0    3    3    2    2    1    3  953    4]
 [   0    2    0    3    9    3    0    3    3  986]]
