## Neural Network Practice on the MNIST data set

### Setup

In [None]:
import numpy as np
import struct
from array import array


class MnistDataloader(object):
    def __init__(
        self,
        training_images_filepath,
        training_labels_filepath,
        test_images_filepath,
        test_labels_filepath,
    ):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        with open(labels_filepath, "rb") as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError(
                    "Magic number mismatch, expected 2049, got {}".format(magic)
                )
            labels = array("B", file.read())

        with open(images_filepath, "rb") as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError(
                    "Magic number mismatch, expected 2051, got {}".format(magic)
                )
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols : (i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img

        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(
            self.training_images_filepath, self.training_labels_filepath
        )
        x_test, y_test = self.read_images_labels(
            self.test_images_filepath, self.test_labels_filepath
        )
        return (x_train, y_train), (x_test, y_test)


In [None]:
import random
import matplotlib.pyplot as plt
import os

data_base_path = '../../../../data/mnist'
training_images_filepath = os.path.join(data_base_path, 'train-images-idx3-ubyte/train-images-idx3-ubyte')
training_labels_filepath = os.path.join(data_base_path, 'train-labels-idx1-ubyte/train-labels-idx1-ubyte')
test_images_filepath = os.path.join(data_base_path, 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
test_labels_filepath = os.path.join(data_base_path, 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')

def show_images(images, title_texts):
    cols = 5
    rows = int(len(images)/cols) + 1
    plt.figure(figsize=(30,20))
    index = 1    
    for x in zip(images, title_texts):        
        image = x[0]        
        title_text = x[1]
        plt.subplot(rows, cols, index)        
        plt.imshow(image, cmap=plt.cm.gray)
        if (title_text != ''):
            plt.title(title_text, fontsize = 15);        
        index += 1

mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

images_2_show = []
titles_2_show = []
for i in range(0, 10):
    r = random.randint(1, 60000)
    images_2_show.append(x_train[r])
    titles_2_show.append('training image [' + str(r) + '] = ' + str(y_train[r]))    

for i in range(0, 5):
    r = random.randint(1, 10000)
    images_2_show.append(x_test[r])        
    titles_2_show.append('test image [' + str(r) + '] = ' + str(y_test[r]))    

show_images(images_2_show, titles_2_show)

#### Identifying the shape of the data

In [None]:
import pandas as pd

df = pd.DataFrame(
    [
        ("x_train", type(x_train), "Raw image data"),
        (
            "x_train[0]",
            type(x_train[0]),
            "2D array representing pixel values for a single image",
        ),
        ("y_train", type(y_train), "Labels"),
    ],
    columns=["variable", "type", "description"],
)
df


In [None]:
len(x_train), len(x_train[0]), len(x_train[0][0]), type(x_train[0][0]), type(x_train[0][0][0])

### Construct input and output layers

The next step is to construct a random neural network.  I'll start by constructing the input layer and output layer.

First, define some utilities:

1. need a way to flatten a 28x28 image into a single list
2. need a method to normalize a list of numbers

In [None]:
import itertools
from typing import Iterable, Sequence, TypeVar

T = TypeVar("T")

def flatten_2d_data(data: Iterable[Iterable[T]]) -> list[T]:
    """Flatten a 2d matrix into a 1d matrix by
    chaining together each row into a list.
    """
    # return itertools.chain.from_iterable(data)
    return list(itertools.chain.from_iterable(data))

def normalize(a: np.ndarray) -> np.ndarray:
    return a / np.sum(a)

In [None]:
input_layer_len = len(x_train[0]) * len(x_train[0][0])
output_layer = np.zeros(10)
print(f"{input_layer_len=}")
print(f"{output_layer=}")

### Construct random weights matrix

`np.random.randn` returns values from the standard normal distribution, a common initialization
method for neural networks.

The matrix must be of size `len(output)` x `len(input)`, which makes sense as a linear transformation
from dimensionality 784 to dimensionality 10.

Here I scale the inital weights by 0.01, a relatively arbitrary value.  The values should be small because in a network with many layers, large initial weights can lead to large outputs from neurons, which when passed through an activation function (like a sigmoid or a tanh), might end up in the saturation region of the function. This can lead to vanishing gradients during backpropagation, making the network harder to train.

Note that techniques like Xavier initialization and He initialization improve upon this random method.

In [None]:
# Initialize the weights with small random values
weights = np.random.randn(output_layer.size, input_layer_len) * 0.01
weights

In [None]:
weights.shape

#### Predicting one image

Now find the output of applying the weights to the first image.

In [None]:
# image1 = normalize(flatten_2d_data(x_train[0]))
image1 = flatten_2d_data(x_train[0])
label1 = y_train[0]
print(f"{len(image1)=} {label1=}")

In [None]:
output = np.dot(weights, image1)
output

In [None]:
output = normalize(output)
output

#### Loss function

Now define the *loss function*; I'll use the common **squared error loss**.

In [None]:
def loss(label: int, output: np.ndarray) -> float:
    if sum(output) != 1: raise Exception("output must be normalized")
    return sum(
        val**2 if i != label else (val - 1) ** 2 for i, val in enumerate(output)
    )


In [None]:
loss(label1, normalize(output))

In [None]:
sum(output ** 2)