In [1]:
%pip install numpy
%pip install torchvision

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [275]:
import numpy as np
from torchvision.datasets import MNIST
from numpy import ndarray
import time

In [276]:
def download_mnist(is_train: bool):
    dataset = MNIST(root='./data', transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)

    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return mnist_data, mnist_labels

In [277]:
train_x, train_y = download_mnist(True)
test_x, test_y = download_mnist(False)

In [278]:
def transform_to_one_hot_encoding(arr):
    """
    Used to transform the labels into one hot encoding.
    E.g. from 0 -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], for each element in the array
    """
    np_arr = np.array(arr)
    
    dim_arr = len(np_arr)
    labels = np.zeros((dim_arr, np_arr.max() + 1))
    labels[np.arange(np_arr.size), np_arr] = 1
    
    return labels

In [279]:
# Trasform python lists into numpy arrays
training_data = np.array(train_x)
training_labels = transform_to_one_hot_encoding(train_y)

test_data = np.array(test_x)
test_labels = transform_to_one_hot_encoding(test_y)

# Normalize data
training_data = training_data / 255.0
test_data = test_data / 255.0

In [280]:
# How NN(neural network) will look
# Layer 1 | Layer 2 | Layer 3
#   784   |   100   |   10     (neurons on each layer)

# Layer 1 will be with the input data
# Layer 2 will be the one and only hidden layer
# Layer 3 will be the ouput

In [281]:
def init_nn() -> (list[ndarray], list[ndarray]):
    interval_1 = np.sqrt(6 / (784 + 100))
    interval_2 = np.sqrt(6 / (100 + 10))
    
    weights_2 = np.random.uniform(-interval_1, interval_1, (784, 100))
    weights_3 = np.random.uniform(-interval_2, interval_2, (100, 10))

    biases_2 = np.random.uniform(-interval_1, interval_1, (1, 100))
    biases_3 = np.random.uniform(-interval_2, interval_2, (1, 10))

    return [weights_2, weights_3], [biases_2, biases_3]

In [282]:
def shuffle_data_and_create_batches(training_data, training_labels, batch_size=128):
    p = np.random.permutation(len(training_data))
    training_data, training_labels = training_data[p], training_labels[p]
        
    batches_data = []
    batches_labels = []

    for i in range(0, len(training_data), batch_size):
        batches_data.append(training_data[i:i+batch_size])
        batches_labels.append(training_labels[i:i+batch_size])

    return batches_data, batches_labels


In [283]:
def sigmoid(arr):
    return 1.0 / (1.0 + np.exp(-arr))

In [284]:
def sigmoid_prime(arr):
    return sigmoid(arr)*(1-sigmoid(arr))

In [285]:
def softmax(arr):
    # max_arr = arr.max()
    max_arr = arr.max(axis=1, keepdims=True)
    exp_arr = np.exp(arr - max_arr)
    return exp_arr / exp_arr.sum(axis=1, keepdims=True)

In [286]:
def forward_propagation(batch_data, weights, biases, keep_prob=1.0, training=False, dropout_mask=None):
    # Apply sigmoid for first hidden layer
    z2 = batch_data @ weights[0] + biases[0]
    y2 = sigmoid(z2)

    dropout_mask = None
    if training:
        dropout_mask = (np.random.rand(*y2.shape) < keep_prob).astype(float)
        y2 *= dropout_mask
        y2 /= keep_prob

    # Apply softmax for last layer
    z3 = y2 @ weights[1] + biases[1]
    y3 = softmax(z3)
    
    return z2, y2, z3, y3, dropout_mask

In [287]:
def backward_propagation(training_data, training_labels, weights, biases, learning_rate, keep_prob=1.0):
    m = training_data.shape[0]

    z2, y2, z3, y3, dropout_mask = forward_propagation(training_data, weights, biases, keep_prob, training=True)
    
    gradient_3 = training_labels - y3
    weights_update_3 = y2.T @ gradient_3 * learning_rate / m
    biases_update_3 = np.sum(gradient_3, axis=0) * learning_rate / m

    gradient_2 = (gradient_3 @ weights[1].T) * sigmoid_prime(z2)
    gradient_2 *= dropout_mask

    weights_update_2 = training_data.T @ gradient_2 * learning_rate / m
    biases_update_2 = np.sum(gradient_2, axis=0) * learning_rate / m

    return ([weights_update_2, weights_update_3],
            [biases_update_2, biases_update_3])

In [288]:
def train(training_data, training_labels, 
          weights, biases, keep_prob=1.0, learning_rate=0.01, epochs=50):

    total_training_time = 0

    for epoch_idx in range(epochs):
        start_time_epoch = time.time()

        batches_data, batches_labels = shuffle_data_and_create_batches(training_data, training_labels)
        for batch_data, batch_labels in zip(batches_data, batches_labels):
            update_weights, update_biases = backward_propagation(batch_data, batch_labels, weights, biases, learning_rate, keep_prob)

            weights = [weights[0] + update_weights[0], weights[1] + update_weights[1]]
            biases = [biases[0] + update_biases[0], biases[1] + update_biases[1]]

        duration = time.time() - start_time_epoch
        print(f"Epoch {epoch_idx + 1} finished in {duration}")
        total_training_time += duration
        
    print(f"Duration training: {total_training_time}")

    return weights, biases

In [289]:
def calculate_accuracy(test_data, test_labels, weights, biases):
    _, _, _, y_pred, _ = forward_propagation(test_data, weights, biases)
    predicted_labels = np.argmax(y_pred, axis=1)
    true_labels = np.argmax(test_labels, axis=1)
    correct_results = np.sum(predicted_labels == true_labels)
    return (correct_results / len(test_data)) * 100

In [290]:
weights, biases = init_nn()

In [291]:
weights_after_training, biases_after_training = train(training_data, training_labels, weights, biases, learning_rate=0.1, epochs=160, keep_prob=0.8)

Epoch 1 finished in 0.4319443702697754
Epoch 2 finished in 0.44324326515197754
Epoch 3 finished in 0.49606919288635254
Epoch 4 finished in 0.6483631134033203
Epoch 5 finished in 0.5829238891601562
Epoch 6 finished in 0.6610839366912842
Epoch 7 finished in 0.5333311557769775
Epoch 8 finished in 0.5315380096435547
Epoch 9 finished in 0.4037201404571533
Epoch 10 finished in 0.45422935485839844
Epoch 11 finished in 0.4047837257385254
Epoch 12 finished in 0.40921807289123535
Epoch 13 finished in 0.4452836513519287
Epoch 14 finished in 0.4076821804046631
Epoch 15 finished in 0.4140491485595703
Epoch 16 finished in 0.43315935134887695
Epoch 17 finished in 0.6729543209075928
Epoch 18 finished in 0.5263488292694092
Epoch 19 finished in 0.5854573249816895
Epoch 20 finished in 1.3881092071533203
Epoch 21 finished in 1.4902067184448242
Epoch 22 finished in 0.6575241088867188
Epoch 23 finished in 1.0880048274993896
Epoch 24 finished in 0.66575026512146
Epoch 25 finished in 0.4997708797454834
Epoch 

In [292]:
calculate_accuracy(test_data, test_labels, weights_after_training, biases_after_training)

np.float64(97.54)