In [1]:
import os
# print(os.getcwd())

import numpy as np
import matplotlib.pyplot as plt

from src import mlp
from src import mnist
from src import config

# Data preprocessing

In [2]:
mlp = mlp.MLP(num_classes = config.NUM_CLASSES, learning_rate = config.LEARNING_RATE)
mnist = mnist.MNIST(config.TRAINING_IMAGES_FILEPATH,
              config.TRAINING_LABELS_FILEPATH,
              config.TEST_IMAGES_FILEPATH,
              config.TEST_LABELS_FILEPATH)

MNIST Data loaded and processed:
  x_train shape: (60000, 784), dtype: float32
  y_train shape: (60000, 10), dtype: float64
  x_test shape: (10000, 784), dtype: float32
  y_test shape: (10000, 10), dtype: float64


In [3]:
# Set random seed and initialize weights and biases
np.random.seed(42)
mlp.initialize_weights(config.INPUT_SIZE, config.HIDDEN_SIZES)

print(f"W1: {mlp.weights['W1'].shape}, W2: {mlp.weights['W2'].shape}, W3: {mlp.weights['W3'].shape}, W4: {mlp.weights['W4'].shape}")
print(f"b1: {mlp.biases['b1'].shape}, b2: {mlp.biases['b2'].shape}, b3: {mlp.biases['b3'].shape}, b4: {mlp.biases['b4'].shape}")

W1: (784, 512), W2: (512, 56), W3: (56, 128), W4: (128, 10)
b1: (1, 512), b2: (1, 56), b3: (1, 128), b4: (1, 10)


# Training

In [None]:
# Training loop
num_epochs = 20
batch_size = 32

for epoch in range(num_epochs):
    # Shuffle training data at the start of each epoch
    permutation = np.random.permutation(mnist.x_train.shape[0])
    mnist.x_train = mnist.x_train[permutation]
    mnist.y_train = mnist.y_train[permutation]

    # Iterate over mini-batches
    for i in range(0, mnist.x_train.shape[0], batch_size):
        x_batch = mnist.x_train[i:i+batch_size]  # Extract batch inputs
        y_batch = mnist.y_train[i:i+batch_size]  # Extract batch labels

        # Perform forward and backward pass
        activations = mlp.forward_pass(x_batch)
        mlp.backward_pass(x_batch, y_batch, activations)

    # Perform forward pass on training data
    activations = mlp.forward_pass(mnist.x_train)

    # Extract predictions from the output layer (softmax results)
    train_pred = activations[f"A{len(mlp.weights)}"] 

    # Calculate accuracy
    train_accuracy = np.mean(np.argmax(train_pred, axis=1) == np.argmax(mnist.y_train, axis=1))
    print(f"Epoch {epoch + 1}, training accuracy: {train_accuracy * 100:.2f}%")

Epoch 1, training accuracy: 11.24%
Epoch 2, training accuracy: 11.24%
Epoch 3, training accuracy: 11.24%
Epoch 4, training accuracy: 11.24%
Epoch 5, training accuracy: 11.24%
Epoch 6, training accuracy: 11.24%
Epoch 7, training accuracy: 21.11%
Epoch 8, training accuracy: 47.50%
Epoch 9, training accuracy: 73.66%
Epoch 10, training accuracy: 86.53%
Epoch 11, training accuracy: 90.39%
Epoch 12, training accuracy: 92.46%
Epoch 13, training accuracy: 94.16%
Epoch 14, training accuracy: 95.08%
Epoch 15, training accuracy: 96.07%
Epoch 16, training accuracy: 96.52%
Epoch 17, training accuracy: 96.77%
Epoch 18, training accuracy: 97.33%
Epoch 19, training accuracy: 97.50%
Epoch 20, training accuracy: 98.11%


In [5]:
# Evaluate on test data
test_activations = mlp.forward_pass(mnist.x_test)
test_pred = test_activations[f"A{len(mlp.weights)}"]
test_accuracy = np.mean(np.argmax(test_pred, axis=1) == np.argmax(mnist.y_test, axis=1))
print(f"Test Accuracy: {test_accuracy * 100:.2f}")

Test Accuracy: 96.66


Once the MLP is trained, its weights and biases are stored in a file.

In [6]:
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'w1.npy'), np.array(mlp.weights['W1']))
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'w2.npy'), np.array(mlp.weights['W2']))
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'w3.npy'), np.array(mlp.weights['W3']))
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'w_out.npy'), np.array(mlp.weights['W4']))

np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'b1.npy'), np.array(mlp.biases['b1']))
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'b2.npy'), np.array(mlp.biases['b2']))
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'b3.npy'), np.array(mlp.biases['b3']))
np.save(os.path.join(config.WEIGHTS_AND_BIASES_PATH + r'\mlp_0\npy', 'b_out.npy'), np.array(mlp.biases['b4']))