# Assignment Part 2: Neural Net Template

This file contains the template code for the Neural Net with hidden layers.

### Artificial Neural Net Class

#### 2.1 Complete the initialisation of the neural net. <br>
#### 2.3. Complete the training implementation.<br>
#### 2.4. Complete the testing implementation <br>

In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
np.random.seed(42)

In [18]:
class ANN:

    #==========================================#
    # The init method is called when an object #
    # is created. It can be used to initialize #
    # the attributes of the class.             #
    #==========================================#
    def __init__(self, no_inputs, no_outputs, hidden_layers=[3,4,5], max_iterations=20, learning_rate=0.1):

        self.no_inputs = no_inputs #input layer size
        self.no_outputs = no_outputs #output layer size
        self.no_hidden_layers = len(hidden_layers) #number of hidden layers in the network
        self.hidden_layers = hidden_layers #parameter
        self.weights = [] #weights for each layer
        self.biases = []  #biases for each layer                     

        for layer in range(self.no_hidden_layers+1): #initialise weights and biases
            if layer == 0: #first layer
                no_nodes = self.hidden_layers[0] 
                no_inputs_to_layer = no_inputs
            elif layer == self.no_hidden_layers:
                no_nodes = self.no_outputs
                no_inputs_to_layer = self.hidden_layers[-1] #output layer
            else:
                no_nodes = self.hidden_layers[layer] #nodes same number as previous layer
                no_inputs_to_layer = self.hidden_layers[layer-1]
                                 
            # initialise weight matrix of shape: (no_nodes, no_inputs_to_layer)
            weights = np.random.randn(no_nodes, no_inputs_to_layer)
            self.weights.append(weights) #add the updated weight matrix to the network
            # adding the bias vector 
            biases = np.zeros((1, no_nodes))
            self.biases.append(biases) #add to the updated biases to the network
        
        self.max_iterations = max_iterations
        self.learning_rate = learning_rate

    #===================================#
    # Performs the activation function. #
    # Expects an array of values of     #
    # shape (1,N) where N is the number #
    # of nodes in the layer.            #
    #===================================#
    def activate(self, a): #Sigmoid
        return 1 / (1 + np.exp(-a))
    
    

    #===============================#
    # Trains the net using labelled #
    # training data.                #
    #===============================#
    def train(self, training_data, labels, batch_size=10):
        n = len(training_data)
        for i in range(0, n, batch_size):
            batch_inputs = training_data[i:i+batch_size] #appending batch inputs to batch size parameter
            batch_labels = labels[i:i+batch_size] #appending batch labels to corresponding imput
        
            # feed forward propogation
            activation = batch_inputs.T
            outputs = [activation]
            for j in range(self.no_hidden_layers+1):
                z = np.dot(self.weights[j], activation) + self.biases[j].T #weighted sum of the previous layer matrices and the bias term for current layer
                activation = self.activate(z) #weights then sum the activation for that layer
                outputs.append(activation) #append the activation value for current layer
            error = outputs[-1] - batch_labels.T # calculate output layer error

            # backpropagation
            partial_derivatives = [np.dot(error, outputs[-2].T)] #calculated by the actual and predicted outcomes
            for j in range(self.no_hidden_layers, 0, -1):
                error = np.dot(self.weights[j].T, error) * outputs[j] * (1 - outputs[j]) #backpropgating the error signal by multiplying the transpose of the weight matrix
                partial_derivatives.append(np.dot(error, outputs[j-1].T)) #partial derviatives appeneded at each layer
            partial_derivatives.reverse()

            # update weights and biases
            for j in range(self.no_hidden_layers+1):
                self.biases[j] -= self.learning_rate * np.sum(error, axis=1, keepdims=True) # updating biases 
                self.biases[j] = self.biases[j].T #biases transposed back to match the dimensions of orginal bias vector
    
    def test(self, testing_data, labels):
        assert len(testing_data) == len(labels)
        testing_data = np.concatenate((testing_data, np.ones((len(testing_data), 1))), axis=1)
        true_positives = 0
        true_negatives = 0
        false_positives = 0
        false_negatives = 0
        for x, y in zip(testing_data, labels): 
            output = self.predict(x)
            if np.argmax(output) == np.argmax(y):
                if np.argmax(output) == 1:
                    true_positives += 1
                else:
                    true_negatives += 1
            else:
                if np.argmax(output) == 1:
                    false_positives += 1
                else:
                    false_negatives += 1

        accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)
        precision = true_positives / (true_positives + false_positives)
        recall = true_positives / (true_positives + false_negatives)

        print("Accuracy:\t"+str(accuracy))
        print("Precision:\t"+str(precision))
        print("Recall:\t"+str(recall))


#### 2.1. Complete the implementation of the main method.

In [19]:
# Load training data and test dataset

image_size = 28 # width and length
no_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size

train_data = np.loadtxt("fashion_mnist_train.csv", delimiter=",")
test_data = np.loadtxt("fashion_mnist_test.csv", delimiter=",")


In [20]:
#Multiplying each pixel by 0.99 / 255 and adding 0.1 to the result to avoid 0 values as imputs, which prevent weight updates. 
fac = 0.99 / 255
train_imgs = np.asfarray(train_data[:, 1:]) * fac + 0.01
test_imgs = np.asfarray(test_data[:, 1:]) * fac + 0.01

train_labels = np.asfarray(train_data[:, :1])
test_labels = np.asfarray(test_data[:, :1])

#One hot encoding for labels 
lr = np.arange(10)

#Changing labelled images into one-hot representations. 
train_labels_one_hot = (lr==train_labels).astype(float)
test_labels_one_hot = (lr==test_labels).astype(float)

train_labels_one_hot[train_labels_one_hot==0] = 0.01
train_labels_one_hot[train_labels_one_hot==1] = 0.99
test_labels_one_hot[test_labels_one_hot==0] = 0.01
test_labels_one_hot[test_labels_one_hot==1] = 0.99


In [21]:
annette = ANN(no_inputs=image_pixels,
                        no_outputs=10,
                        hidden_layers=[100],
                        learning_rate=0.001)

# Training the neural network
print("Training...")
training_data = np.array([np.append([1], d[1:]) for d in train_imgs])
labels = train_labels_one_hot
annette.train(training_data, labels)
print("Complete.")

# Testing the neural network
print("Testing...")
testing_data = np.array([np.append([1], d[1:]) for d in test_imgs])
labels = test_labels_one_hot
annette.test(testing_data, labels)
print("Complete.")

Training...
(784, 10)


ValueError: non-broadcastable output operand with shape (1,100) doesn't match the broadcast shape (100,100)

#### 2.5 - Implement the rectifier activation function. 

In [None]:
class ReLU_ANN:

    #==========================================#
    # The init method is called when an object #
    # is created. It can be used to initialize #
    # the attributes of the class.             #
    #==========================================#
    def __init__(self, no_inputs, no_outputs, hidden_layers=[3,4,5], max_iterations=20, learning_rate=0.1):

        self.no_inputs = no_inputs 
        self.no_outputs = no_outputs #number of nodes in the output layer
        self.no_hidden_layers = len(hidden_layers) #number of hidden layers in the network
        self.hidden_layers = hidden_layers #parameter
        self.weights = [] #weights for each layer
        self.biases = []  #biases for each layer                     

        for layer in range(self.no_hidden_layers+1): #initialise weights and biases
            if layer == 0: #first layer
                no_nodes = self.hidden_layers[0] 
                no_inputs_to_layer = no_inputs
            elif layer == self.no_hidden_layers:
                no_nodes = self.no_outputs
                no_inputs_to_layer = self.hidden_layers[-1] #output layer
            else:
                no_nodes = self.hidden_layers[layer] #nodes same number as previous layer
                no_inputs_to_layer = self.hidden_layers[layer-1]
                                 
            # initialise weight matrix of shape: (no_nodes, no_inputs_to_layer)
            weights = np.random.randn(no_nodes, no_inputs_to_layer)
            self.weights.append(weights)
            
            biases = np.zeros((1, no_nodes))
            self.biases.append(biases)
        
        self.max_iterations = max_iterations
        self.learning_rate = learning_rate

    #===================================#
    # Performs the activation function. #
    # Expects an array of values of     #
    # shape (1,N) where N is the number #
    # of nodes in the layer.            #
    #===================================#
    def activate(self, a): #ReLU
        return np.maximum(0,a)

    #===============================#
    # Trains the net using labelled #
    # training data.                #
    #===============================#
    def train(self, training_data, labels, batch_size=10):
        n = len(training_data)
        for i in range(0, n, batch_size):
            batch_inputs = training_data[i:i+batch_size]
            batch_labels = labels[i:i+batch_size]
        
            # feed forward
            activation = batch_inputs.T
            outputs = [activation]
            print(activation.shape)
            for j in range(self.no_hidden_layers+1):
                z = np.dot(self.weights[j], activation) + self.biases[j].T #weighted sum of the previous layers
                activation = self.activate(z)
                outputs.append(activation)
                
            # calculate output layer error
            error = outputs[-1] - batch_labels.T

            # backpropagation
            partial_derivatives = [np.dot(error, outputs[-2].T)]
            for j in range(self.no_hidden_layers, 0, -1):
                error = np.dot(self.weights[j].T, error) * (outputs[j] > 0)
                partial_derivatives.append(np.dot(error, outputs[j-1].T))
            partial_derivatives.reverse()

            # update weights and biases
            for j in range(self.no_hidden_layers+1):
                self.biases[j] -= self.learning_rate * np.sum(error, axis=1, keepdims=True)
                self.biases[j]

In [None]:
ReLU = ReLU_ANN(no_inputs=image_pixels,
                        no_outputs=10,
                        hidden_layers=[100],
                        learning_rate=0.001)

# Training the neural network
print("Training...")
training_data = np.array([np.append([1], d[1:]) for d in train_imgs])
labels = train_labels_one_hot
ReLU.train(training_data, labels)
print("Complete.")

# Testing the neural network
print("Testing...")
testing_data = np.array([np.append([1], d[1:]) for d in test_imgs])
labels = test_labels_one_hot
ReLU.test(testing_data, labels)