In [1]:
%pip install numpy
%pip install torchvision

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [76]:
import numpy as np
from torchvision.datasets import MNIST
from numpy import ndarray
import time

In [77]:
def download_mnist(is_train: bool):
    dataset = MNIST(root='./data', transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)

    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return mnist_data, mnist_labels

In [78]:
train_x, train_y = download_mnist(True)
test_x, test_y = download_mnist(False)

In [79]:
def transform_to_one_hot_encoding(arr):
    """
    Used to transform the labels into one hot encoding.
    E.g. from 0 -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], for each element in the array
    """
    np_arr = np.array(arr)
    
    dim_arr = len(np_arr)
    labels = np.zeros((dim_arr, np_arr.max() + 1))
    labels[np.arange(np_arr.size), np_arr] = 1
    
    return labels

In [80]:
# Trasform python lists into numpy arrays
training_data = np.array(train_x)
training_labels = transform_to_one_hot_encoding(train_y)

test_data = np.array(test_x)
test_labels = transform_to_one_hot_encoding(test_y)

# Normalize data
training_data = training_data / 255.0
test_data = test_data / 255.0

In [81]:
# How NN(neural network) will look
# Layer 1 | Layer 2 | Layer 3
#   784   |   100   |   10     (neurons on each layer)

# Layer 1 will be with the input data
# Layer 2 will be the one and only hidden layer
# Layer 3 will be the ouput

In [124]:
def init_nn() -> (list[ndarray], list[ndarray]):
    weights_2 = np.random.randn(784, 100)
    weights_3 = np.random.randn(100, 10)

    biases_2 = np.random.randn(1, 100)
    biases_3 = np.random.randn(1, 10)

    return [weights_2, weights_3], [biases_2, biases_3]

In [115]:
def shuffle_data_and_create_batches(training_data, training_labels, batch_size=128):
    p = np.random.permutation(len(training_data))
    training_data, training_labels = training_data[p], training_labels[p]
        
    batches_data = []
    batches_labels = []

    for i in range(0, len(training_data), batch_size):
        batches_data.append(training_data[i:i+batch_size])
        batches_labels.append(training_labels[i:i+batch_size])

    return batches_data, batches_labels


In [116]:
def sigmoid(arr):
    return 1.0 / (1.0 + np.exp(-arr))

In [117]:
def sigmoid_prime(arr):
    return sigmoid(arr)*(1-sigmoid(arr))

In [149]:
def softmax(arr):
    # max_arr = arr.max()
    max_arr = arr.max(axis=1, keepdims=True)
    exp_arr = np.exp(arr - max_arr)
    return exp_arr / exp_arr.sum(axis=1, keepdims=True)

In [119]:
def forward_propagation(batch_data, weights, biases):
    # Apply sigmoid for first hidden layer
    z2 = batch_data @ weights[0] + biases[0]
    y2 = sigmoid(z2)

    # Apply softmax for last layer
    z3 = y2 @ weights[1] + biases[1]
    y3 = softmax(z3)
    
    return y3

In [190]:
def backward_propagation(training_data, training_labels, weights, biases, learning_rate):
    m = training_data.shape[0]
    
    # Apply sigmoid for first hidden layer
    z2 = training_data @ weights[0] + biases[0]
    y2 = sigmoid(z2)

    # Apply softmax for last layer
    z3 = y2 @ weights[1] + biases[1]
    y3 = softmax(z3)

    gradient_3 = training_labels - y3
    weights_update_3 = y2.T @ gradient_3 * learning_rate / m
    biases_update_3 = np.sum(gradient_3, axis=0) * learning_rate / m

    gradient_2 = (gradient_3 @ weights[1].T) * sigmoid_prime(z2)

    weights_update_2 = training_data.T @ gradient_2 * learning_rate / m
    biases_update_2 = np.sum(gradient_2, axis=0) * learning_rate / m

    return ([weights_update_2, weights_update_3],
            [biases_update_2, biases_update_3])

In [191]:
def train(training_data, training_labels, 
          weights, biases, learning_rate=0.01, epochs=50):

    total_training_time = 0

    for epoch_idx in range(epochs):
        start_time_epoch = time.time()

        batches_data, batches_labels = shuffle_data_and_create_batches(training_data, training_labels)
        for batch_data, batch_labels in zip(batches_data, batches_labels):
            update_weights, update_biases = backward_propagation(batch_data, batch_labels, weights, biases, learning_rate)

            weights = [weights[0] + update_weights[0], weights[1] + update_weights[1]]
            biases = [biases[0] + update_biases[0], biases[1] + update_biases[1]]

        duration = time.time() - start_time_epoch
        print(f"Epoch {epoch_idx + 1} finished in {duration}")
        total_training_time += duration
        
    print(f"Duration training: {total_training_time}")

    return weights, biases

In [181]:
def calculate_accuracy(test_data, test_labels, weights, biases):
    y_pred = forward_propagation(test_data, weights, biases)
    predicted_labels = np.argmax(y_pred, axis=1)
    true_labels = np.argmax(test_labels, axis=1)
    correct_results = np.sum(predicted_labels == true_labels)
    return (correct_results / len(test_data)) * 100

In [203]:
weights, biases = init_nn()

In [204]:
weights_after_training, biases_after_training = train(training_data, training_labels, weights, biases, learning_rate=0.1, epochs=160)

Epoch 1 finished in 0.36101484298706055
Epoch 2 finished in 0.4740335941314697
Epoch 3 finished in 0.3663315773010254
Epoch 4 finished in 0.42629575729370117
Epoch 5 finished in 0.5887162685394287
Epoch 6 finished in 0.6911945343017578
Epoch 7 finished in 0.9767932891845703
Epoch 8 finished in 0.8059229850769043
Epoch 9 finished in 0.45921850204467773
Epoch 10 finished in 0.7308199405670166
Epoch 11 finished in 0.34475278854370117
Epoch 12 finished in 0.5754756927490234
Epoch 13 finished in 0.7255203723907471
Epoch 14 finished in 0.9077844619750977
Epoch 15 finished in 0.4134204387664795
Epoch 16 finished in 0.4041252136230469
Epoch 17 finished in 0.465381383895874
Epoch 18 finished in 0.41702866554260254
Epoch 19 finished in 0.5123035907745361
Epoch 20 finished in 0.3730912208557129
Epoch 21 finished in 0.6529583930969238
Epoch 22 finished in 0.639235258102417
Epoch 23 finished in 0.48391151428222656
Epoch 24 finished in 0.45989561080932617
Epoch 25 finished in 0.4666624069213867
Epoc

In [205]:
calculate_accuracy(test_data, test_labels, weights_after_training, biases_after_training)

np.float64(94.8)