In [1]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
# scipy special for the sigmoid function expit()
import scipy.special as special
import time

# Neural network class definition
class NeuralNetwork:
    # Initialise the neural network
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in each input, hidden, output layer
        self.inodes = input_nodes
        self.hnodes = hidden_nodes
        self.onodes = output_nodes
        
        # Link weight matrices, wih and who
        # weights inside the arrays are w_i_J, where link is
        # from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc
        self.wih = np.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))
        
        # Learning rate
        self.lr = learning_rate
        
        # Activation function is the sigmoid function
        self.activation_function = lambda x: special.expit(x)
    
    # Train the neural network
    def train(self, inputs_list, targets_list):
        # Convert inputs to 2d array
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        
        
        # Calculate signals into hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        # Calculate the signals emerging from the hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # Calculate the signals into final output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        # Calculate the signals emerging from the final output layer
        final_outputs = self.activation_function(final_inputs)
        
        # Error is the (target - actual)
        output_errors = targets - final_outputs
        
        # Hidden layer error is the output_errors, split by weigths, recombined at hidden nodes
        hidden_errors = np.dot(self.who.T, output_errors)
        
        # Update the weigths for the links between the hidden and output layers
        self.who += self.lr * np.dot((output_errors * final_outputs * (1.0 - final_outputs)),
                                     np.transpose(hidden_outputs))

        # Update the weigths for the links between the hidden and output layers
        self.wih += self.lr * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
                                     np.transpose(inputs))
        
    
    def query(self, inputs_list):
        # Convert input list to 2d array
        inputs = np.array(inputs_list, ndmin=2).T
        
        # Calculate signales into hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        # Calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # Calculate signals into final output layer
        final_inputs = np.dot(self.who, hidden_outputs)        
        # Calculate the signals emerging from final output
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs


### Default data

In [2]:
# Number of input, hidden and output nodes
input_nodes = 784
output_nodes = 10

# Load the mnist training data CSV file into a list
training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
training_data_file.close()

# Load the mnist test data CSV file into a list
test_data_file = open("mnist_dataset/mnist_test.csv", 'r')
test_data_list = test_data_file.readlines()
test_data_file.close()

### Performance tester

In [3]:
def test_performance(hidden_nodes, learning_rate, epochs):
    # Create instance of neural network
    n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
    for epoch in range(epochs):
        # Go through all records in the training data set
        # Train the neural network
        for record in training_data_list:
            # Split the record by the ',' commas
            all_values = record.split(',')
            # Scale and shift the inputs
            inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
            # Create the target output values (all 0.01, except the desired label which is 0.99)
            targets = np.zeros(output_nodes) + 0.01
            # all_values[0] is the target label for this record
            targets[int(all_values[0])] = 0.99
            n.train(inputs, targets)

    # Scorecard for how well the network performs, initially empty
    scorecard = []

    # Go through all the records in the test data set
    for record in test_data_list:
        # Split the record by the ',' commas
        all_values = record.split(',')
        # Correct answer is first value
        correct_label = int(all_values[0])
        # Scale and shift the inputs
        inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        # Query the network
        outputs = n.query(inputs)
        # The index of the highest value corresponds to the label
        label = np.argmax(outputs)
        # Append correct or incorrect to list
        if(label == correct_label):
            scorecard.append(1)
        else:
            # Network's answer doesn't match correct answer, add 0 to scorecard
            scorecard.append(0)

    # Calculate the performance score, the fraction of correct answers
    scorecard_array = np.asarray(scorecard)
    return scorecard_array.sum() / scorecard_array.size

### Plotters

In [4]:
def plot_hidden_per(h_list, learning_rate, epochs):
    # Test performance
    hidden_layers_performance = test_hidden_nodes_performance(h_list, learning_rate, epochs)
    
    # Create matplotlib subplot
    fig, ax = plt.subplots()
    # Plot hidden_list values on x axis and hidden_layers_performance on y axis
    ax.plot(h_list, hidden_layers_performance, 'o-')
    ax.set(xlabel="Hidden layers", ylabel="Performance", title="Network performance test (Hidden layers)")
    ax.grid()


def plot_learning_rate_perf(hidden_nodes, lr_list, epochs):
    # Test performance
    learning_rate_performance = test_learning_rate_performance(hidden_nodes, lr_list, epochs)
    
    # Create matplotlib subplot
    fig, ax = plt.subplots()
    # Plot lr_list values on x axis and learning_rate_performance on y axis
    ax.plot(lr_list, learning_rate_performance, 'o-')
    ax.set(xlabel="Learning rate", ylabel="Performance",
           title="Network performance test (Learning rate) with {0} hidden nodes".format(hidden_nodes))
    ax.grid()

    
def plot_epochs_perf(hidden_nodes, learning_rate, epochs_list):
    # Test performance
    epochs_performance = test_epochs_performance(hidden_nodes, learning_rate, epochs_list)
    
    # Create matplotlib subplot
    fig, ax = plt.subplots()
    # Plot lr_list values on x axis and learning_rate_performance on y axis
    ax.plot(epochs_list, epochs_performance, 'o-')
    ax.set(xlabel="Epochs", ylabel="Performance",
           title="Network performance test (Epochs) with {0} hidden nodes and {1} learning rate".format(hidden_nodes,
                                                                                                        learning_rate))
    ax.grid()

### Main Tester

In [None]:
def tester(hidden_nodes_list, learning_rate_list, epochs_list):
    h_perf = {}
    lr_perf = {}
    # Overall epoch performance result
    oe_perf = {"epochs": []}
    for hidden_nodes in hidden_nodes_list:
        print("[*] Testing network performance with {0} hidden nodes.".format(hidden_nodes))
        for learning_rate in learning_rate_list:
            print("    [*] Testing network performance with {0} as learning rate.".format(learning_rate))
            e_perf = []
            for epochs in epochs_list:
                print("        [*] Testing network performance (epochs).")
                e_perf.append(test_performance(hidden_nodes, learning_rate, epochs))
            
            oe_perf["epochs"].append(e_perf)
            lr_perf[str(learning_rate)] = e_perf
        h_perf[str(hidden_nodes)] = lr_perf
    
    return h_perf, lr_perf, oe_perf
            

In [None]:
hidden_list = np.asarray(range(100, 600, 100))
lr_list = np.linspace(0.1, 0.99, 6)
epochs_list = np.asarray(range(1, 11))

In [None]:
h, lr, e = tester(hidden_list, lr_list, epochs_list)

[*] Testing network performance with [100 200 300 400 500] hidden nodes.
    [*] Testing network performance with 0.1 as learning rate.
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
    [*] Testing network performance with 0.278 as learning rate.
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).
        [*] Testing network performance (epochs).