In [1]:
#
# This is a sample Notebook to demonstrate how to read "MNIST Dataset"
#
import numpy as np # linear algebra
import struct
from array import array
from os.path  import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            
        
        return images, labels
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)        

In [2]:
#
# Verify Reading Dataset via MnistDataloader class
#
%matplotlib inline
import random
import matplotlib.pyplot as plt

#
# Set file paths based on added MNIST Datasets
#
input_path = './data'
training_images_filepath = join(input_path, 'train-images.idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels.idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images.idx3-ubyte')
test_labels_filepath = join(input_path, 't10k-labels.idx1-ubyte')

#
# Helper function to show a list of images with their relating titles
#
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images)/cols) + 1
    plt.figure(figsize=(30,20))
    index = 1    
    for x in zip(images, title_texts):        
        image = x[0]        
        title_text = x[1]
        plt.subplot(rows, cols, index)        
        plt.imshow(image, cmap=plt.cm.gray)
        if (title_text != ''):
            plt.title(title_text, fontsize = 15);        
        index += 1

#
# Load MINST dataset
#
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

#
# Show some random training and test images 
#
# images_2_show = []
# titles_2_show = []
# for i in range(0, 10):
#     r = random.randint(1, 60000)
#     images_2_show.append(x_train[r])
#     titles_2_show.append('training image [' + str(r) + '] = ' + str(y_train[r]))    

# for i in range(0, 5):
#     r = random.randint(1, 10000)
#     images_2_show.append(x_test[r])        
#     titles_2_show.append('test image [' + str(r) + '] = ' + str(y_test[r]))    

# show_images(images_2_show, titles_2_show)

In [54]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_layers, output_dim, initialization="xavier", activation="sigmoid"):
        """
        Initialize a neural network with the given dimensions and initialization method.

        Parameters:
            input_dim (int): Number of input features.
            hidden_layers (list of int): List containing the number of units in each hidden layer.
            output_dim (int): Number of output units.
            initialization (str): Initialization method ('xavier', 'he', 'lecun', 'uniform_xavier', 'uniform_he', or 'uniform_lecun').
        """
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.output_dim = output_dim
        self.initialization = initialization
        self.activation = activation

        # Initialize weights and biases
        self.weights = []
        self.biases = []

        # Determine initialization scaling factor
        def init_weight(shape, fan_in, fan_out):
            if self.initialization == "xavier":
                return np.random.randn(*shape) * np.sqrt(2 / (fan_in + fan_out))
            elif self.initialization == "he":
                return np.random.randn(*shape) * np.sqrt(2 / fan_in)
            elif self.initialization == "lecun":
                return np.random.randn(*shape) * np.sqrt(1 / fan_in)
            elif self.initialization == "uniform_xavier":
                limit = np.sqrt(6 / (fan_in + fan_out))
                return np.random.uniform(-limit, limit, size=shape)
            elif self.initialization == "uniform_he":
                limit = np.sqrt(6 / fan_in)
                return np.random.uniform(-limit, limit, size=shape)
            elif self.initialization == "uniform_lecun":
                limit = np.sqrt(3 / fan_in)
                return np.random.uniform(-limit, limit, size=shape)
            else:
                raise ValueError("Unsupported initialization method. Choose 'xavier', 'he', 'lecun', 'uniform_xavier', 'uniform_he', or 'uniform_lecun'.")

        # Input to first hidden layer
        self.weights.append(init_weight((hidden_layers[0], input_dim), input_dim, hidden_layers[0]))
        self.biases.append(np.zeros((hidden_layers[0], 1)))

        # Between hidden layers
        for i in range(1, len(hidden_layers)):
            self.weights.append(init_weight((hidden_layers[i], hidden_layers[i-1]), hidden_layers[i-1], hidden_layers[i]))
            self.biases.append(np.zeros((hidden_layers[i], 1)))

        # Last hidden layer to output layer
        self.weights.append(init_weight((output_dim, hidden_layers[-1]), hidden_layers[-1], output_dim))
        self.biases.append(np.zeros((output_dim, 1)))

    def activate(self, x):
        if self.activation == "sigmoid":
            return 1 / (1 + np.exp(-x))
        
    def activation_derivative(self, x):
        if self.activation == "sigmoid":
            negexp = np.exp(-x)
            # wolfram alpha coming in clutch
            return negexp / ((1 + negexp)**2)

    def compute_output(self, input):
        output = self.activate((self.weights[0] @ input) + self.biases[0])
        for i in range(1, len(self.weights)):
            output = self.activate((self.weights[i] @ output) + self.biases[i])
        return output
    
    def get_input_layer(self, input):
        input_layer = []
        for i in range(len(input)):
            for j in range(len(input[i])):
                input_layer.append(input[i][j])
        return np.array(input_layer).reshape(-1, 1)
    
    def cost(self, input, label):
        target = np.zeros(10)
        target[label] = 1
        target = target.reshape(-1, 1)
        diff_squared = np.square(input - target)
        cost = 0
        for i in range(len(diff_squared)):
            cost += diff_squared[i]
        return cost
    
    def total_cost(self, input_data, input_labels):
        sum = 0
        for i in range(len(input_data)):
            input_layer = self.get_input_layer(input_data[i])
            output_layer = self.compute_output(input_layer)
            sum += self.cost(output_layer, input_labels[i])
        return sum

    def forward_pass(self, input):
        activations = [input]
        weighted_sums = []

        ws = (self.weights[0] @ input) + self.biases[0]
        act = self.activate(ws)

        weighted_sums.append(ws)
        activations.append(act)

        for i in range(1, len(self.weights)):
            ws = (self.weights[i] @ act) + self.biases[i]
            act = self.activate(ws)

            weighted_sums.append(ws)
            activations.append(act)

        return activations, weighted_sums
        
    
    # def compute_gradient(self, input, label):



    def summarize(self):
        """Print a summary of the network's dimensions and parameter shapes."""
        print("Neural Network Summary:")
        print(f"Input dimension: {self.input_dim}")
        print(f"Hidden layers: {self.hidden_layers}")
        print(f"Output dimension: {self.output_dim}")
        print(f"Initialization method: {self.initialization}\n")
        for idx, (w, b) in enumerate(zip(self.weights, self.biases)):
            print(f"Layer {idx + 1} weights shape: {w.shape}")
            print(f"Layer {idx + 1} biases shape: {b.shape}")


In [55]:
nn = NeuralNetwork(784, [16, 16], 10, "xavier", "sigmoid")

In [56]:
nn.forward_pass(nn.get_input_layer(x_train[0]))

([array([[  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
         [  0],
        