In [11]:
%matplotlib inline

import numpy as np
import struct
import matplotlib.pyplot as plt
import random
from typing import Tuple, List
from numpy.typing import NDArray

In [13]:
class MnistData:
    """Contains all MNIST images and labels."""
    
    DIGIT_COUNT = 10
    
    def __init__(self, images_path: str, labels_path: str, flatten_image: bool = True) -> None:
        self._read_images(images_path)
        self._read_labels(labels_path)
        self.flatten_image = flatten_image

    def split(self, validation_percent: float = 0.2, seed: int = 42) -> Tuple['MnistData', 'MnistData']:
        if not 0.0 < validation_percent < 1.0:
            raise ValueError("val_ratio must be between 0 and 1.")

        rng = np.random.default_rng(seed)
        indices = rng.permutation(self.size)
        split = int(self.size * (1 - validation_percent))
        training_idx, validation_idx = indices[:split].tolist(), indices[split:].tolist()

        training = self.__subset(training_idx)
        validation = self.__subset(validation_idx)
        return training, validation

    def show_image(self, idx: int) -> None:
        image = self.images[idx]
        label = self.labels[idx]
        
        plt.imshow(image, cmap=plt.cm.gray)
        plt.title(f"Label: {label}")
        plt.axis('off')
        plt.show()

    def _read_images(self, path: str) -> None:
        with open(path, 'rb') as f:
            header = struct.unpack('>IIII', f.read(16))
            magic_number, self.size, self.img_rows, self.img_cols = header

            if magic_number != 2051:
                raise ValueError(f'Magic number mismatch, expected 2051, got {magic_number}')

            raw_images = f.read()
        
        images = np.frombuffer(raw_images, dtype=np.uint8)
        self.images = images.astype(np.float32).reshape(self.size, self.img_rows, self.img_cols) / 255.0

    def _read_labels(self, path: str) -> None:
        with open(path, 'rb') as f:
            header = struct.unpack('>II', f.read(8))
            magic_number, total_labels = header

            if magic_number != 2049:
                raise ValueError(f'Magic number mismatch, expected 2049, got {magic_number}')
            if self.size != total_labels:
                raise ValueError(f'Number of images does not match number of labels. There are {self.size} images and {total_labels} labels.')

            raw_labels = f.read()
        
        self.labels = np.frombuffer(raw_labels, dtype=np.uint8)

    def __len__(self) -> int:
        return self.size

    def __getitem__(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:
        if not 0 <= idx < self.size:
            print(idx)
            raise ValueError(f'Number of images does not match dataset size of {self.size}.')
                
        image = self.images[idx]
        if self.flatten_image:
            image = image.flatten().reshape(-1, 1)
            
        label = self.labels[idx]
        
        return image, label

    def __iter__(self):
        for idx in range(self.size):
            yield self[idx]

    def __subset(self, indices: List[int]) -> 'MnistData':
        data = object.__new__(MnistData)
        
        data.flatten_image = self.flatten_image
        data.size = len(indices)
        data.img_rows = self.img_rows
        data.img_cols = self.img_cols
        data.images = self.images[indices]
        data.labels = self.labels[indices]

        return data

In [15]:
class NeuralNetwork:    
    def __init__(self, sizes: List[int]) -> None:
        # TODO CHAPTER 1:
        self.layer_count = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
    
    def feed_forward(self, a):
        # TODO CHAPTER 1:
        for i in range(self.layer_count - 1):
            a = self.weights[i] @ a + self.biases[i]

        return a.argmax()

    def evaluate(self, test_data):
        # TODO CHAPTER 1:
        correct = 0
        for image, label in test_data:
            if self.feed_forward(image) == label:
                correct += 1

        return correct / test_data.size

    def stochastic_gradient_descent(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        pass
    
    def update_mini_batch(self, mini_batch, eta):
        pass

    def back_propagation(self, x, y):
        pass

    def cost_derivative(self, output_activations, y):
        pass

In [None]:
def main():
    training, validation = MnistData('mnist-data/train-images.idx3-ubyte', 'mnist-data/train-labels.idx1-ubyte').split()
    testing = MnistData('mnist-data/t10k-images.idx3-ubyte', 'mnist-data/t10k-labels.idx1-ubyte')

    accuracy = []
    for i in range(10000):
        digit_classifier = NeuralNetwork([training.img_rows * training.img_cols, 15, 15, training.DIGIT_COUNT])
        testing_accuracy = digit_classifier.evaluate(testing)
        accuracy.append(testing_accuracy)

    # Create the histogram
    plt.hist(accuracy, bins=100, edgecolor='black')
    
    # Add labels and title
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.title('Histogram of Floating Values (0 to 1)')
    
    # Show the plot
    plt.show()

main()