## Assignment - 1 : Implementing Backpropagation Algorithm

**Date: 6th June 2022**

In [4]:
#Importing Libraries
import time
from csv import reader
from math import exp
import pickle

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

#### Loading and cleaning file

In [5]:
def read_csv_file(file):
    """
    This function accepts the filename agrument which is the csv file that is to be read
    and outputs the dataset
    """
    df = [] #empty list to store the dataset row wise
    with open(file, 'r') as f: #file opened in read mode only
        file_read = reader(f)
        for row in file_read:
            if not row:
                #if the file has reached the end or the row is blank
                continue
            df.append(row)
    return df

# Type casts string column to float as normalized pixel values are expected to be float
def preprocessing_file(df, col):
    """
    This function accepts the dataset returned by load_csv function and column as argument to strip
    any blank space at the end of each cell and casts it into float
    """
    for row in df:
        row[col] = float(row[col].strip())

In [6]:
filename = r'train_data.csv'
x = read_csv_file(filename) #loading the data file as csv into list
for i in range(len(x[0])): #as range is indexed from 0 
    preprocessing_file(x, i) # strip and type cast string to float
    
filename_2 = r'train_labels.csv'
y = read_csv_file(filename_2) #loading the data labels file as csv into list
for i in range(len(y[0])):
    preprocessing_file(y, i) 

In [7]:
#to handle 'U32 safe' error 
x = np.array(x, dtype=np.float32) 
y = np.array(y, dtype=np.float32)

In [8]:
print(f'Length of train_data is {len(x)} train_labels is {len(y)}')

Length of train_data is 24754 train_labels is 24754


In [9]:
#using the train_test_split function from scikit learn to split our dataset into training and validation
#data set in the ratio of 80:20 
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.20, random_state=42)
print(f'Length of Training dataset is : {len(x_train)} , {len(y_train)}')
print(f'Length of Training dataset is : {len(x_val)} , {len(y_val)}')

Length of Training dataset is : 19803 , 19803
Length of Training dataset is : 4951 , 4951


In [10]:
class NeuralNetwork():
    """
    class consists of methods for forward pass , backpropagation , activation function(sigmoid) 
    and other required methods. We pass dfault hyperparameters of our choice to initialize the methods. 
    """
    def __init__(self, shape_list, ep=15, learning_rate=0.1):
        self.shapes = shape_list # list that specifices the input vector size, number of neurons in hidden layer & output size  
        self.epoch = ep
        self.learning_rate = learning_rate

        # to store all parameters in the NN
        self.parameters = self.network_initialization()

    def sigmoid(self, x, d=False):
        if d:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x, d=False):
        # Numerically stable with large exponentials
        exps = np.exp(x - x.max())
        if d:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)

    def network_initialization(self):
        # number of neurons in each layer from sizes parameter passed
        ip_l=self.shapes[0]
        hd_1=self.shapes[1]
        op_l=self.shapes[2]

        parameters = {
            'w1':np.random.randn(hd_1, ip_l) * np.sqrt(1. / hd_1),
            'b1':np.random.randn(hd_1),
            'w2':np.random.randn(op_l, hd_1) * np.sqrt(1. / op_l),
            'b2':np.random.randn(op_l)
        }

        return parameters

    def forwardpass(self, x_train):
        parameters = self.parameters

        # input vector
        parameters['a0'] = x_train

        # from input layer to hidden layer
        parameters['z1'] = np.dot(parameters["w1"], parameters['a0']) + parameters['b1']
        parameters['a1'] = self.sigmoid(parameters['z1'])

        # from hidden layer to output layer
        parameters['z2'] = np.dot(parameters["w2"], parameters['a1']) + parameters['b2']
        parameters['a2'] = self.softmax(parameters['z2'])

        return parameters['a2']

    def backpass(self, y_train, output):
        '''
            this function implememts backpropagation algorithm for calculating the parameter updates
            using chaining to update parameters.
        '''
        parameters = self.parameters
        #delta 
        delta_w = {}

        # Calculate the W2 update
        error = 2 * (output - y_train) / output.shape[0] * self.softmax(parameters['z2'], d=True) #default derivative parameter is set to False
        delta_w['w2'] = np.outer(error, parameters['a1'])

        # Calculate W1 update
        error = np.dot(parameters['w2'].T, error) * self.sigmoid(parameters['z1'], d=True) #default derivative parameter is set to False
        delta_w['w1'] = np.outer(error, parameters['a0'])

        return delta_w

    def update(self, delta_in_w):
        '''
            Updating network parameters based on update method of Stochastic Gradient Descent.
        '''
        
        for key, value in delta_in_w.items():
            self.parameters[key] -= self.learning_rate * value
            
    def predict(self, x_val):
        prediction = []
        
        for x in x_val:
            output = self.forwardpass(x)
            pred = np.argmax(output)
            #one_hot = pd.get_dummies(pred)
            prediction.append(pred)
        
        return pd.get_dummies(prediction).values

    def accuracy(self, x_val, y_val):
        '''
        this function clculates accuracy by using the highest index value returned compared to true label
        '''
        predictions = []
        
        #using the validation dataset for calculating accuracy
        for x, y in zip(x_val, y_val):
            output = self.forwardpass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)
    
    #Method to call to train the network
    def training(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        for i in range(self.epoch):
            for x,y in zip(x_train, y_train):
                output = self.forwardpass(x)
                delta_in_w = self.backpass(y, output)
                self.update(delta_in_w)
            
            accuracy = self.accuracy(x_val, y_val)
            #y_pred = self.predict(x_val)
            #print(f'{y_pred}')
            '''
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                i+1, time.time() - start_time, accuracy * 100
            ))'''

In [11]:
nn = NeuralNetwork(shape_list=[784, 128, 4])
nn.training(x_train, y_train, x_val, y_val)

learning_rate_list = [0.001, 0.01, 0.1, 1]
acc_list = []
for l in learning_rate_list:
    nn = NeuralNetwork(shape_list=[784, 128, 4], learning_rate = l, ep = 10)
    nn.training(x_train, y_train, x_val, y_val)
    acc= nn.accuracy(x_val, y_val)
    acc_list.append(acc)
print(acc_list)

from matplotlib import pyplot as plt
import matplotlib
%matplotlib inline

x = learning_rate_list
y = acc_list 
plt.plot(x,y)
plt.xlabel('Learning Rate')
plt.ylabel('Accuracy')
plt.title("Learning rate vs Accuracy plot")

In [12]:
#result = nn.predict(x_val)
#print(result)
#result.values

In [13]:
# save the model to disk
filename = 'NeuralNetwork_model.sav'
pickle.dump(nn, open(filename, 'wb'))

In [14]:
# load the model from disk
#saved_nn = pickle.load(open(filename, 'rb'))
#result = saved_nn.predict(x_val)
#print(result)

In [15]:
def test_mlp(data_file):
    #test = pd.read_csv(data_file)
    filename = r'train_data.csv'
    test = read_csv_file(filename) #loading the data file as csv into list
    for i in range(len(test[0])-1): #as range is indexed from 0 
        preprocessing_file(test, i) # strip and type cast string to float
    #to handle U32 'safe' error due to large matrix multiplication
    test = np.array(test, dtype=np.float32)
    # Load your network
    # START
    filename_model = r'NeuralNetwork_model.sav'
    saved_nn = pickle.load(open(filename_model, 'rb'))
    # END
    
    # Predict test set - one-hot encoded
    y_pred = saved_nn.predict(test)

    return y_pred

In [16]:
#from sklearn.metrics import accuracy_score

y_pred = test_mlp(r'train_data.csv')
filename = r'train_labels.csv'
test_labels = read_csv_file(filename) #loading the data file as csv into list
for i in range(len(test_labels[0])-1): #as range is indexed from 0 
    preprocessing_file(test_labels, i) # strip and type cast string to float
#test_labels = pd.read_csv(r'train_labels.csv')
test_labels = np.array(test_labels)
test_labels = np.array(test_labels, dtype=np.float32)
#test_accuracy = accuracy_score(test_labels, y_pred)*100
#print(test_accuracy)

#Accuracy with increased number of neurons in hidden layer 
#nn = NeuralNetwork(sizes=[784, 256, 4])
#nn.train(x_train, y_train, x_val, y_val)

In [18]:
import numpy as numpy

def accuracy(y_true, y_pred):
    if not (len(y_true) == len(y_pred)):
        print('Size of predicted and true labels not equal.')
        return 0.0

    corr = 0
    for i in range(0,len(y_true)):
        corr += 1 if (y_true[i] == y_pred[i]).all() else 0

    return corr/len(y_true)

In [19]:
y_pred = test_mlp('train_data.csv')

test_accuracy = accuracy(test_labels, y_pred)*100
print(f'Accuracy is {test_accuracy}')

Accuracy is 99.54350811989981


In [20]:
#training accuracy
print(f'Training accuracy is : {nn.accuracy(x_train, y_train)}')

Training accuracy is : 0.9975761248295713


### References

[1]. Retreived from URL: https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

[2]. Retreived from URL: https://github.com/casperbh96/Neural-Network-From-Scratch

[3]. Retreived from URL: https://towardsdatascience.com/implementing-backpropagation-with-style-in-python-da4c2f49adb4

[4] Retreived from URL: https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/




In [21]:
#How we will test your code:
from nn import read_csv_file, preprocessing_file
from test_mlp import test_mlp, STUDENT_NAME, STUDENT_ID
from acc_calc import accuracy 
import numpy as np

#just change the file name to be tested
filename = r'train_labels.csv'
test_labels = read_csv_file(filename) #loading the data labels file as csv into list
for i in range(len(test_labels[0])-1):
    preprocessing_file(test_labels, i) 

#just change the file name to be tested
y_pred = test_mlp(r'train_data.csv')

test_labels = np.array(test_labels, dtype=np.float32)

test_accuracy = accuracy(test_labels, y_pred)*100
print(f'Accuracy is : {test_accuracy}')

Length of train_data is 24754 train_labels is 24754
Accuracy is : 99.24456653470146
