In [3]:
###################################################
# A basic feed forward neural network
# Neural networks for people who get confused easily
# James McCammon
# May 2024
###################################################

###
# Define classes for our neural network
###

import numpy as np
import random
import string
from keras.datasets import mnist

class NNManager:
    def __init__(self, n_features, layers):
        self.network = []
        p_l = n_features
        for n_nodes in layers:   
            self.network.append(Dense_layer(p_l, n_nodes))
            p_l = n_nodes

    def batch_manager(self, inputs, true_labels, learning_rate, batch_size):
        batched_inputs = self.batch_splitter(inputs, batch_size)
        batched_true_labels = self.batch_splitter(true_labels, batch_size)

        for batch_number in range(len(batched_inputs)):
            self.optimizer(batched_inputs[batch_number], batched_true_labels[batch_number], learning_rate)

    def batch_splitter(self, inputs, batch_size):
        batches = [inputs[k:k + batch_size] for k in range(0, len(inputs), batch_size)]
        if(len(batches[-1]) != batch_size):
            batches.pop(-1)
        return np.array(batches)

    def optimizer(self, inputs, true_labels, learning_rate):
        predicted_probs = self.nn_forward_pass(inputs)
        self.calculate_loss(predicted_probs, true_labels)
        self.nn_backward_pass(inputs, predicted_probs, true_labels, learning_rate)
    
    def nn_forward_pass(self, inputs):
        previous_layer_output = inputs

        #Iterate over each layer in the network
        for layer in self.network:
            layer.forward(previous_layer_output)

            if (layer == self.network[-1]): # Check if layer is last layer
                layer.post_activation_output = Activation.softmax(layer.pre_activation_output)
                predicted_probs = layer.post_activation_output 
            else:
                layer.post_activation_output = Activation.relu(layer.pre_activation_output)            
            
            previous_layer_output = layer.post_activation_output

        return predicted_probs
    
    def calculate_loss(self, predicted_probs, true_labels):
        loss = Loss.cross_entropy(true_labels, predicted_probs)
        return loss
            
    def nn_backward_pass(self, inputs, predicted_probs, true_labels, learning_rate, clip_value = 1.0):
        cumulative_derivative = predicted_probs - true_labels
        backward_network = np.flip(self.network)

        for idx, layer in enumerate(backward_network):
            if idx == len(backward_network) - 1:
                current_layer_post_activation = inputs.T
                
            else:
                previous_layer_pre_activation = backward_network[idx + 1].pre_activation_output
                current_layer_post_activation = backward_network[idx + 1].post_activation_output.T
            
            weight_gradient = current_layer_post_activation @ cumulative_derivative
            bias_gradient = np.sum(cumulative_derivative, axis=0, keepdims=True)

            # Clip gradients to prevent exploding gradients
            weight_gradient = np.clip(weight_gradient, -clip_value, clip_value)
            bias_gradient = np.clip(bias_gradient, -clip_value, clip_value)

            layer.weights -= learning_rate * weight_gradient.T
            layer.biases -= learning_rate * bias_gradient.T
            
            if idx != len(backward_network) - 1:
                cumulative_derivative = (cumulative_derivative @ layer.weights) * (previous_layer_pre_activation > 0)


class Predictor:
    @staticmethod
    def predict(NNManager, input_to_predict, true_label):
        prediction = np.argmax(NNManager.nn_forward_pass(input_to_predict))
        print(f"Class prediction is {prediction}")
        print(f"Actual class is {np.argmax(true_label)}")
        return prediction

class Dense_layer:
    def __init__(self, n_inputs, n_nodes):
        self.name = ''.join(random.choices(string.ascii_uppercase, k=5))
        self.n_nodes = n_nodes
        self.weights = self.initialize_weights(n_nodes, n_inputs)
        self.biases = self.initialize_biases(n_nodes)
        self.pre_activation_output = None
        self.post_activation_output = None

    # Use the so-called "He initalization"
    def initialize_weights(self, n_nodes, n_inputs):
        mu = 0
        sigma = np.sqrt(2 / n_inputs)
        return np.random.normal(mu, sigma, (n_nodes, n_inputs))
    
    def initialize_biases(self, n_nodes):
        return np.zeros((n_nodes, 1))
    
    def forward(self, input):
        self.pre_activation_output = input @ self.weights.T + self.biases.T # Transpose biases so broadcasting works as expected
        return self.pre_activation_output
    
class Activation:
    @staticmethod
    def relu(layer_input):
        post_activation_output = np.maximum(0, layer_input)
        return post_activation_output
    
    @staticmethod
    def softmax(logits):
        exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        predicted_probs = exp_logits / exp_logits.sum(axis=1, keepdims=True)
        return predicted_probs

class Loss:
    @staticmethod
    def cross_entropy(true_labels, predicted_probs):
        batch_size = true_labels.shape[0]
        predicted_probs_clipped = np.clip(predicted_probs, 1e-7, 1 - 1e-7)
        log_probs = np.log(predicted_probs_clipped)
        loss = -1 * np.multiply(true_labels, log_probs).sum() / batch_size
        return loss


In [4]:
###
# Uses the classes on the well-known MNIST dataset
###

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images
train_images = train_images / 255.0
test_images = test_images / 255.0

# Flatten the images
train_images = train_images.reshape(train_images.shape[0], -1)
test_images = test_images.reshape(test_images.shape[0], -1)

# Convert labels to one-hot encoding
def one_hot_encode(labels, num_classes):
    one_hot = np.zeros((labels.size, num_classes))
    one_hot[np.arange(labels.size), labels] = 1
    return one_hot

train_labels = one_hot_encode(train_labels, 10)
test_labels = one_hot_encode(test_labels, 10)

# Define the neural network parameters
input_size = 784  # 28x28
hidden_size = 64
output_size = 10
batch_size = 32
learning_rate = 0.01
epochs = 10

# Initialize and train the neural network
nn_manager = NNManager(input_size, [hidden_size, output_size])

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    permutation = np.random.permutation(train_images.shape[0])
    train_images_shuffled = train_images[permutation]
    train_labels_shuffled = train_labels[permutation]
    
    nn_manager.batch_manager(train_images_shuffled, train_labels_shuffled, learning_rate, batch_size)

# Evaluate the neural network on test data
predicted_probs = nn_manager.nn_forward_pass(test_images)
test_loss = nn_manager.calculate_loss(predicted_probs, test_labels)
print(f"Test Loss: {test_loss:.4f}")

# Calculate accuracy
predictions = np.argmax(predicted_probs, axis=1)
labels = np.argmax(test_labels, axis=1)
accuracy = np.mean(predictions == labels)
print(f"Test Accuracy: {accuracy:.4f}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 1.7810
Test Accuracy: 0.8638


In [35]:
# Optionally we can predict the class of a specific image and compare it against the actual class label
image_number = 403
image_to_predict = test_images[image_number]
true_label = test_labels[image_number]
Predictor.predict(nn_manager, image_to_predict, true_label)

Class prediction is 8
Actual class is 8


8