# Assignment 02 Part 2: Neural Net Template

Allan Calderwood - 202077625

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import random
import time

from sklearn import metrics
from sklearn.utils import shuffle

np.random.seed(0)

## Neural Net Class

In [2]:
class ANN:

    #==========================================#
    # The init method is called when an object #
    # is created. It can be used to initialize #
    # the attributes of the class.             #
    #==========================================#
    def __init__(self, no_inputs, no_hidden_layers=1, hidden_layer_size=16, output_layer_size=1,
                 max_iterations=5, activation="sigmoid", learning_rate=0.1):

        self.no_inputs = no_inputs
        self.no_hidden_layers = no_hidden_layers
        self.hidden_layer_size = hidden_layer_size
        self.output_layer_size = output_layer_size
        self.activation = activation
        self.max_iterations = max_iterations
        self.learning_rate = learning_rate
        
        # Draw initial random weights from a normal distribution, 
        # with a mean of 0 and sd of 2/No inputs for each layer, using He Initialisation,
        # as it works well with ReLU which will be added later to try and mitigate the vanishing 
        # gradient problem
        self.layers = []

        for l in range(no_hidden_layers+2):
            # if input layer use no_inputs to initialise, else use previous layer output size
            if l==0: 
                # define the shape of the weights, the no. of inputs to the layer and the no. of neurons
                weights = np.ones(shape=(self.hidden_layer_size, self.no_inputs+1))
                inputs = no_inputs
                n_neurons = hidden_layer_size
            # if output layer
            elif l==no_hidden_layers+1:
                weights = np.ones(shape=(self.output_layer_size, self.hidden_layer_size+1))
                inputs = hidden_layer_size
                n_neurons = output_layer_size
            # if hidden layer
            else: 
                weights = np.ones(shape=(self.hidden_layer_size, self.hidden_layer_size+1))
                inputs = hidden_layer_size
                n_neurons = hidden_layer_size
            # now create the weights using He initialisation
            for x in range(n_neurons):
                weights[x] = np.random.normal(0, (2/inputs), inputs+1)
                
            # add weights as a layer   
            self.layers.insert(l, weights)
            
     #=======================================#
    # Prints the details of the neural net.  #
    #=======================================#
    def print_details(self):
        print("Model Summary")
        print("----------------------------")
        print("No. inputs: " + str(self.no_inputs))
        print("Hidden Layers: (" + str(self.no_hidden_layers) 
              +","+str(self.hidden_layer_size)+")")
        print("Max iterations:\t" + str(self.max_iterations))
        print("Learning rate:\t" + str(self.learning_rate))
        print("Activation function: " + str(self.activation))
        print("----------------------------")

    #===================================#
    # Performs the activation function. #
    # Expects an array of values of     #
    # shape (1,N) where N is the number #
    # of nodes in the layer.            #
    #===================================#
    def activate(self, z):
        if (self.activation=="relu"):
            return z * (z > 0)
        elif (self.activation=="sigmoid"):
            return 1 / (1 + np.exp(-z))
        elif (self.activation=="linear"):
            return z
        
    #===================================#
    # Returns the derivatives of the    #
    # activation function chosen        #
    #===================================#    
    def derivative(self, z):
        if (self.activation=="relu"):
            return 1.*(z > 0)
        elif (self.activation=="sigmoid"):
            return z*(1.0-z)
        elif (self.activation=="linear"):
            return 1
    
    #===================================#
    # Loss function of the neural net   #
    #===================================#    
    def loss(self, output, actual):
        return np.array(output-actual)
    
    #===================================#
    # Performs the forward phase of the #
    # Neural network                    #
    #===================================#  
    def forward_phase(self, inputs):
        instance =  inputs
        # add 1 to training data for use with bias
        instance = np.hstack(([1], instance))
        # get output of first layer and store in 2d array--
        self.outputs = []
        self.outputs.insert(0, self.activate(
            np.dot(self.layers[0], instance.T)))

        # loop for hidden and output layers calculating the output and passing to next layer
        for layer in range(1, self.no_hidden_layers+2):
            # get the weighted sum of inputs for each layer and pass to activation function
            # then store as output of the layer
            self.outputs.insert(layer, self.activate(
                np.dot(self.layers[layer], np.hstack(([1], self.outputs[layer-1])))
            ))       
    
        # return the final output from the network, i.e. the prediction
        return self.outputs[-1]
    
    #===================================#
    # Performs the backprop phase of    #
    # the Neural network, returns the   #
    # Partial derivatives.              #
    #===================================#  
    def back_propagation(self, inputs, x, y, i, predicted):
        partial_derivatives = [0] * (self.no_hidden_layers+2)
        error_terms = [0] * (self.no_hidden_layers+2)
        inputs_w_bias = np.hstack(([1], inputs))
        
        # for layer in layers reversed
        for l in range(self.no_hidden_layers+1, -1, -1):
            # if output layer
            if l == self.no_hidden_layers+1:
                error_terms.insert(l, self.loss(predicted, y.iloc[i])*self.derivative(predicted))
                
            # if input layer or hidden layer
            else:
                # for neuron in next layer get weight of this neuron * error * derivative of activation function
                layer_errors = []
                for j in range(len(self.layers[l])):
                    error = 0.0
                    for k in range(len(self.layers[l+1])):
                        error += self.layers[l+1][k][j] *error_terms[l+1][k] * self.derivative(self.outputs[l][j])
                    layer_errors.insert(j, error)
                
                # store the errors for this layer
                error_terms.insert(l, np.array(layer_errors))
                
            # calculate partial derivative for this layer,
            # if input layer use x, else use previous layers output
            if l == 0:
                partial_derivatives[l] = np.dot(error_terms[l][np.newaxis,:].T, inputs_w_bias[np.newaxis,:])
            else:
                inputs_t_layer = np.hstack(([1], self.outputs[l-1]))[np.newaxis,:]
                partial_derivatives[l] = np.dot(error_terms[l][np.newaxis,:].T, inputs_t_layer) 
                
        # return the partial derivatives       
        return partial_derivatives

    #===============================#
    # Trains the net using labelled #
    # training data.                #
    #===============================#
    def train(self, training_data, labels, target):
        # time how long it takes to train
        start_time = time.time()
        # predicting parameter represents which digit we will be 
        # predicting in a OneVsRest style
        self.target = target
        
        # make sure the length of the x and y sets
        # are the same, i.e. no missing data
        assert len(training_data) == len(labels)
        
        # group all Y values that are not equal to
        # the class we are trying to predict and set
        # them to 0 and the target to 1
        labels = np.where(labels == self.target, 1, 0)
        
        # perform the training
        print("Training model...")
        for _ in range(self.max_iterations):
            
            # shuffle both x and y together to perform stochastic gradient descent
            # this is using a reindexing method to ensure they maintain 
            # the correct pairs of predictors and labels
            idx = np.random.permutation(training_data.index)
            x = training_data.reindex(idx).reset_index(drop=True)
            y = pd.DataFrame(labels).reindex(idx).reset_index(drop=True)
            
            for i in range(len(y)):
                # get the forward phase prediction for the instance of training data (x)
                inputs = x.iloc[i]
                predicted = self.forward_phase(inputs)
                
                # backprop to find partial derivatives
                partial_derivatives = self.back_propagation(inputs, x, y, i, predicted)

                # now update the weights for each layer
                for layer in range(self.no_hidden_layers+2):
                    self.layers[layer] = self.layers[layer] - (np.array(partial_derivatives[layer])*self.learning_rate)
                    
            print("Epoch {}/{} completed.".format(_+1, self.max_iterations))
        print("Model trained.")
        print ('Time elapsed: {:.4f} seconds'.format(time.time()-start_time))

    #=========================================#
    # Function to report on test results      #
    #=========================================#
    def class_report(self, actuals, preds):
        # init a confusion matrix
        confusion_matrix = {"tn":0, "tp":0, "fp":0, "fn":0}
        
        # for each pred check result vs actual to construct the matrix
        for actual, pred in zip(actuals, preds):
            if (pred == actual):
                if pred == 1:
                    confusion_matrix["tp"] +=1
                else:
                    confusion_matrix["tn"] +=1
            else:
                if pred == 1:
                    confusion_matrix["fp"] +=1
                else:
                    confusion_matrix["fn"] +=1
                    
        # use try except blocks to catch any possible divisions by 0 in calculations
        try:
            # calculate accuracy, recall, precision and f1 based on confusion matrix
            accuracy = ((confusion_matrix["tp"] + confusion_matrix["tn"])/
                       (confusion_matrix["tp"] + confusion_matrix["tn"] + confusion_matrix["fp"] + confusion_matrix["fn"]))*100

            precision = confusion_matrix["tp"] / (confusion_matrix["tp"] + confusion_matrix["fp"])*100

            recall =  confusion_matrix["tp"] / (confusion_matrix["tp"] + confusion_matrix["fn"])*100

            f1 = 2*((precision*recall)/(precision+recall))
            
        except ZeroDivisionError:
            print("Error occured, tried to divide by 0. Model predicted either no Tp or no Tn")
        
        try:
            # output results
            print("Testing Results")
            print("----------------------------")
            print("Accuracy: {:.2f}%".format(accuracy))
            print("Precision: {:.2f}%".format(precision))
            print("Recall: {:.2f}%".format(recall))
            print("F1 Score: {:.2f}%".format(f1))
            print("Confusion Matrix: {}".format(
                    [[confusion_matrix["tp"], confusion_matrix["fp"]],
                    [confusion_matrix["fn"], confusion_matrix["tn"]]]
                ))
            print("----------------------------")
        except UnboundLocalError:
            pass
        
    #=========================================#
    # Tests the prediction on each element of #
    # the testing data. Prints the precision, #
    # recall, and accuracy.                   #
    #=========================================#
    def test(self, testing_data, labels):
        # make sure the length of the x and y sets
        # are the same, i.e. no missing data
        assert len(testing_data) == len(labels)
        
        # group all Y values that are not equal to
        # the class we are trying to predict and set
        # them to 0 and the target to 1
        labels = np.where(labels == self.target, 1, 0)
        
        preds = np.ones(shape=len(labels))
        
        for i in range(len(labels)):
            instance = testing_data.iloc[i]
            # add 1 to training data for use with bias
            # get prediction and append to the preds list
            preds[i] = self.forward_phase(instance)
        
        # use threshold of 0.5
        preds = np.where(preds >= 0.5, 1, 0)
        
        # output the results
        self.class_report(labels, preds)  

# Main method

## Prepare Data

In [3]:
# load the MNIST data
train_data = pd.read_csv("./mnist_train.csv", header=None)
test_data = pd.read_csv("./mnist_test.csv", header=None)

In [4]:
# seperate into X and Y for train and test
# divide by 255 to scale the data between 0 and 1
X_train = train_data.drop(train_data.columns[0], axis=1) /255
Y_train = train_data[train_data.columns[0]].copy()

X_test = test_data.drop(train_data.columns[0], axis=1) /255
Y_test = test_data[train_data.columns[0]].copy()

## 2.1 Implementation of Main Method

In [5]:
# Initialise a neural net and view the details, 
nn = ANN(28*28, max_iterations=1)
nn.print_details()

Model Summary
----------------------------
No. inputs: 784
Hidden Layers: (1,16)
Max iterations:	1
Learning rate:	0.1
Activation function: sigmoid
----------------------------


In [6]:
nn.train(X_train, Y_train, target = 1)

Training model...
Epoch 1/1 completed.
Model trained.
Time elapsed: 46.8488 seconds


In [7]:
nn.test(X_test, Y_test)

Testing Results
----------------------------
Accuracy: 99.38%
Precision: 97.94%
Recall: 96.56%
F1 Score: 97.25%
Confusion Matrix: [[1096, 23], [39, 8842]]
----------------------------


## 2.2 Complete initilisation 

In [10]:
# Initialise a neural net and view the details, 
nn2 = ANN(28*28, max_iterations=3, learning_rate = 0.2, no_hidden_layers=2, hidden_layer_size=8, output_layer_size=1)
nn2.print_details()

Model Summary
----------------------------
No. inputs: 784
Hidden Layers: (2,8)
Max iterations:	3
Learning rate:	0.2
Activation function: sigmoid
----------------------------


In [11]:
nn2.train(X_train, Y_train, target = 1)

Training model...
Epoch 1/3 completed.
Epoch 2/3 completed.
Epoch 3/3 completed.
Model trained.
Time elapsed: 110.9935 seconds


In [12]:
nn2.test(X_test, Y_test)

Testing Results
----------------------------
Accuracy: 99.58%
Precision: 98.58%
Recall: 97.71%
F1 Score: 98.14%
Confusion Matrix: [[1109, 16], [26, 8849]]
----------------------------
