In [2]:
%matplotlib inline

import numpy as np
import random
from solutions.mnistdata import MnistData

In [3]:
class NeuralNetwork:    
    def __init__(self, sizes):
        """
        Initialize the neural network with the given layer sizes.
        
        Parameters:
        - sizes: A list of integers where each integer represents the number of neurons in that layer.
               The first element is the input layer size, the last is the output layer size,
               and any in between are hidden layers.
               
        Initialize:
        - layer_count: Total number of layers
        - sizes: Store the layer sizes
        - biases: Random initial biases for each layer except the input layer
        - weights: Random initial weights between each pair of adjacent layers
        """
        pass
        
    def activation_function(self, z):
        """
        Implement the sigmoid activation function.
        
        Parameters:
        - z: Input value or numpy array
        
        Returns:
        - Output of sigmoid function: 1 / (1 + e^(-z))
        """
        pass
        
    def activation_function_derivative(self, z):
        """
        Implement the derivative of the sigmoid activation function.
        
        Parameters:
        - z: Input value or numpy array
        
        Returns:
        - Derivative of sigmoid function: sigmoid(z) * (1 - sigmoid(z))
        """
        pass
    
    def feed_forward(self, a):
        """
        Compute the output of the network given an input.
        
        Parameters:
        - a: Input to the network (first layer activations)
        
        Returns:
        - The output activations of the final layer after passing through each layer
          and applying the activation function
        """
        pass
        
    def evaluate(self, test_data):
        """
        Evaluate the network's performance on test data.
        
        Parameters:
        - test_data: List of tuples (x, y) where x is input and y is expected output
        
        Returns:
        - Count of test samples correctly classified by the network
        """
        pass
        
    def cost_derivative(self, output_activations, y):
        """
        Calculate the derivative of the cost function with respect to the output activations.
        For quadratic cost, this is simply the difference between output and expected output.
        
        Parameters:
        - output_activations: The actual output from the network
        - y: The expected/target output
        
        Returns:
        - The gradient of the cost function with respect to the output activations
        """
        pass
        
    def stochastic_gradient_descent(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """
        Train the neural network using mini-batch stochastic gradient descent.
        
        Parameters:
        - training_data: List of tuples (x, y) where x is input and y is expected output
        - epochs: Number of training epochs (iterations over the entire training data)
        - mini_batch_size: Size of each mini-batch for gradient descent
        - eta: Learning rate
        - test_data: Optional test data to evaluate performance after each epoch
        
        This method should:
        1. Shuffle the training data for each epoch
        2. Split training data into mini-batches
        3. Call update_mini_batch on each mini-batch
        4. If test_data is given, it should evaluate and print the network's performance after each epoch
        """
        pass
    
    def update_mini_batch(self, mini_batch, eta):
        """
        Update the network's weights and biases by applying gradient descent
        using backpropagation to a single mini batch.
        
        Parameters:
        - mini_batch: List of tuples (x, y) where x is input and y is expected output
        - eta: Learning rate
        
        This method should:
        1. Initialize gradient arrays for biases and weights
        2. For each training example in the mini-batch:
           - Call backpropagation to get gradients
           - Add these gradients to the total
        3. Update weights and biases by averaging the gradients over the mini-batch
           and multiplying by the learning rate
        """
        pass
        
    def back_propagation(self, x, y):
        """
        Compute the gradient of the cost function with respect to weights and biases
        using the backpropagation algorithm.
        
        Parameters:
        - x: Input to the network
        - y: Expected output
        
        Returns:
        - nabla_b: Gradient of the cost function with respect to biases
        - nabla_w: Gradient of the cost function with respect to weights
        
        This method should:
        1. Perform a feed-forward pass, storing all z values and activations
        2. Compute the output error (difference between output and expected)
        3. Backpropagate this error through the network
        4. Return the gradients for biases and weights
        """
        pass

In [4]:
def main():
    # Raw Data
    raw_training = MnistData('mnist-data/train-images.idx3-ubyte', 'mnist-data/train-labels.idx1-ubyte')
    raw_testing = MnistData('mnist-data/t10k-images.idx3-ubyte', 'mnist-data/t10k-labels.idx1-ubyte')

    # Processed Data
    training = raw_training.get_data()
    testing = raw_testing.get_data()

    digit_classifier = NeuralNetwork([raw_training.img_rows * raw_training.img_cols, 15, 15, raw_training.DIGIT_COUNT])

    digit_classifier.stochastic_gradient_descent(training, 10, 30, 3, testing)

    accuracy = digit_classifier.evaluate(testing)
    if accuracy:
        print(f'Final Accuracy: {accuracy} / {len(testing)}')

main()