In [None]:
# ** (Colab only) Run this cell once at session start **
!rm -rf digit-recognition
!git clone https://github.com/dev079144/digit-recognition.git
%cd digit-recognition/notebooks
%pip install -r ../requirements.txt

In [None]:
# Hyperparameters
# Edit as you want
# Modify this cell only

hidden_sizes = [128]    # List of sizes of hidden layers
batch_size = 32         # Number of samples per batch
learning_rate = 0.01    # Gradient descent step size
epochs = 10             # Number of dataset training iterations

In [None]:
# Training
# This cell trains the model with the set hyperparameters
# Uses MLP class from model.py module
# Cross-entropy loss for computation of gradients for backpropagation
# Includes an option to save weights at training completion

import sys
import os
sys.path.append(os.path.abspath('../src'))
import datetime
import math
import numpy as np
from utils import process_data, download_data
from model import MLP

download_data('train')

x, y, n_of_classes = process_data('../data/mnist_train.csv')

input_size = x.shape[0]
output_size = n_of_classes

model = MLP(input_size, hidden_sizes, output_size)

for i in range(epochs):
    set_size = x.shape[1]
    
    shuffled = np.random.permutation(set_size)
    shuffled_x = x[:, shuffled]
    shuffled_y = y[:, shuffled]

    iterations = math.ceil(set_size / batch_size)

    epoch_loss = 0
    right = 0
    total = 0

    for j in range(iterations):
        start = j * batch_size
        end = min((j + 1) * batch_size, set_size)

        batch_x = shuffled_x[:, start:end]
        batch_y = shuffled_y[:, start:end]

        activations, pre_activations = model.forward(batch_x)

        gradients = model.backward(activations, pre_activations, batch_y)

        model.update_parameters(gradients, learning_rate)
    
        epoch_loss += model.loss(activations[f'A{model.connections}'], batch_y)

        prediction = np.argmax(activations[f'A{model.connections}'], axis=0)
        correct = np.argmax(batch_y, axis=0)

        right += np.sum(prediction == correct)
        total += batch_y.shape[1]
        
    avg_loss = epoch_loss / iterations
    accuracy = right / total
    
    print(f'Epoch {i + 1} - Loss: {avg_loss:.5f}, Accuracy: {accuracy:.5f}')

save = input('Training successful. Save weights? (y/n)').strip().lower()

if save == 'y':
    name = f"Weights_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}_hidden-{'-'.join(map(str, hidden_sizes))}_batch-{batch_size}_rate-{learning_rate}"
    path = f'../models/{name}'
    model.save(path)
    print(f'Model weights saved to {path}.')


In [None]:
# Inference
# This cell runs samples to test the model
# If there are saved weights from training, this cell will prompt for weights selection
# Otherwise the model will load pre-trained weights as a default

import sys
import os
sys.path.append(os.path.abspath('../src'))
import re
import numpy as np
import matplotlib.pyplot as plt
from utils import process_data, download_data
from model import MLP

download_data('test')

x, y, n_of_classes = process_data('../data/mnist_test.csv')

input_size = x.shape[0]
output_size = n_of_classes

default_weights = 'Weights_20250718_0037_hidden-128_batch-32_rate-0.01.npz'

models_directory = '../models'

saved_weights = sorted([
    f for f in os.listdir(models_directory)
    if f.endswith('.npz')
])

selected_weights = default_weights

if len(saved_weights) > 1:
    print('Available weights:')

    for i, file in enumerate(saved_weights):
        default_marker = " (default)" if file == default_weights else ""
        print(f"[{i}] {file}{default_marker}")

    default_index = saved_weights.index(default_weights)
    choice = input(f"Select weights [0-{len(saved_weights)-1}] (Enter for default)")
    index = default_index if choice == '' else int(choice)

    selected_weights = saved_weights[index]

    print(f'Using weights: {selected_weights}')

weights_path = os.path.join(models_directory, selected_weights)

match = re.search(r'hidden-([\d\-]+)', selected_weights)

if match:
    hidden_sizes_str = match.group(1)
    hidden_sizes = list(map(int, hidden_sizes_str.split('-')))
else:
    hidden_sizes = [128]

model = MLP(input_size, hidden_sizes, output_size)

model.load(weights_path)

random_index = np.random.randint(x.shape[1])

random_x = x[:, random_index].reshape(-1, 1)
random_y = y[:, random_index]

activations, pre_activations = model.forward(random_x)

prediction = np.argmax(activations[f'A{model.connections}'])

image_dimension = int(np.sqrt(x.shape[0]))
reshaped_x = random_x.reshape(image_dimension, image_dimension)

plt.imshow(reshaped_x, cmap='gray')
plt.title(f'The model thought this was the number {prediction}')
plt.axis('off')
plt.show()
