In [None]:
# Hyperparameters
# Edit as you want
# Modify this cell only

hidden_sizes = [128]    # List of sizes of hidden layers
batch_size = 32         # Number of samples per batch
learning_rate = 0.01    # Gradient descent step size
epochs = 10             # Number of dataset training iterations

In [None]:
# Training
# This cell trains the model with the set hyperparameters
# Uses MLP class from model.py module
# Cross-entropy loss for computation of gradients for backpropagation
# Includes an option to save weights at training completion

import sys
sys.path.append('../src')
import datetime
import math
import numpy as np
from utils import process_data
from model import MLP

x, y, n_of_classes = process_data('../data/mnist-in-csv/mnist_train.csv')

input_size = x.shape[0]
output_size = n_of_classes

model = MLP(input_size, hidden_sizes, output_size)

for i in range(epochs):
    set_size = x.shape[1]
    
    shuffled = np.random.permutation(set_size)
    shuffled_x = x[:, shuffled]
    shuffled_y = y[:, shuffled]

    iterations = math.ceil(set_size / batch_size)

    epoch_loss = 0
    right = 0
    total = 0

    for j in range(iterations):
        start = j * batch_size
        end = min((j + 1) * batch_size, set_size)

        batch_x = shuffled_x[:, start:end]
        batch_y = shuffled_y[:, start:end]

        activations, pre_activations = model.forward(batch_x)

        gradients = model.backward(activations, pre_activations, batch_y)

        model.update_parameters(gradients, learning_rate)
    
        epoch_loss += model.loss(activations[f'A{model.connections}'], batch_y)

        prediction = np.argmax(activations[f'A{model.connections}'], axis=0)
        correct = np.argmax(batch_y, axis=0)

        right += np.sum(prediction == correct)
        total += batch_y.shape[1]
        
    avg_loss = epoch_loss / iterations
    accuracy = right / total
    
    print(f'Epoch {i + 1} - Loss: {avg_loss:.5f}, Accuracy: {accuracy:.5f}')

save = input('Training successful. Save weights? (y/n)').strip().lower()

if save == 'y':
    name = f"Weights_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}_hidden-{'-'.join(map(str, hidden_sizes))}_batch-{batch_size}_rate-{learning_rate}"
    path = f'../models/{name}'
    model.save(path)
    print(f'Model weights saved to {path}.')


Epoch 1 - Loss: 7.13863, Accuracy: 0.84197
Epoch 2 - Loss: 3.73297, Accuracy: 0.90793
Epoch 3 - Loss: 3.12604, Accuracy: 0.92038
Epoch 4 - Loss: 2.79205, Accuracy: 0.92818
Epoch 5 - Loss: 2.55511, Accuracy: 0.93522
Epoch 6 - Loss: 2.37462, Accuracy: 0.93978
Epoch 7 - Loss: 2.21327, Accuracy: 0.94395
Epoch 8 - Loss: 2.07586, Accuracy: 0.94820
Epoch 9 - Loss: 1.95707, Accuracy: 0.95132
Epoch 10 - Loss: 1.85250, Accuracy: 0.95408
Model weights saved to ../models/Weights_20250718_0109_hidden-128_batch-32_rate-0.01.
