## Data Science Lab Assignment - 8
##### Name: Boganadham Venkata Sai Manoj
##### Section: A
##### Roll no: 197121

#### Implementing Artificial Neural Network from scratch

-----------------------------------------
##### First we need to import required libraries

In [12]:
# Importing the required libraries
#importing libraries and functions
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


-------------------------------------------------------------
From here, we will start building our neural network.
We will create helper functions which can work as building blocks of the model

##### Defining the activation functions

In [13]:
def ReLU(z):
    # ReLU, on input x, returns 0 if x is non positive, and x itself if x is positive
    a = np.maximum(0, z)
    return a

##### We also need to initialise the parameters of each of the layers

In [25]:
def initParams(layer_sizes):
    # Create a dictionary that will store parameters associated with its names (like w1, b1, etc.)
    params = {}

    # Creating the parameters for each layer
    for i in range(1, len(layer_sizes)):
        params['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1])*0.01
        params['B' + str(i)] = np.random.randn(layer_sizes[i],1)*0.01
    return params

#### Forward Propagation

In [15]:
def frwdProp(X_train, parameters):
    layers = len(parameters)//2
    vals = {}
    # Calculating the activations layer by layer
    for i in range(1, layers+1):
        # First hidden layer
        if i==1:
            vals['Z' + str(i)] = np.dot(parameters['W' + str(i)], X_train) + parameters['B' + str(i)]
            vals['A' + str(i)] = ReLU(vals['Z' + str(i)])
        else:
            vals['Z' + str(i)] = np.dot(parameters['W' + str(i)], vals['A' + str(i-1)]) + parameters['B' + str(i)]
            if i==layers:
                vals['A' + str(i)] = vals['Z' + str(i)]
            else:
                vals['A' + str(i)] = ReLU(vals['Z' + str(i)])
    return vals

#### Computing the cost

In [16]:
def getCost(vals, Y_train):
    layers = len(vals)//2
    Y_pred = vals['A' + str(layers)]
    cost = 1/(2*len(Y_train)) * np.sum(np.square(Y_pred - Y_train))
    return cost

#### Backward Propagation


In [17]:
def backProp(parameters, vals, X_train, Y_train):
    layers = len(parameters)//2
    m = len(Y_train)
    grads = {}
    for i in range(layers,0,-1):
        # Calculating the gradients for the last layer
        if i==layers:
            dA = 1/m * (vals['A' + str(i)] - Y_train)
            dZ = dA
        # Calculating the gradients for the hidden layers
        else:
            dA = np.dot(parameters['W' + str(i+1)].T, dZ)
            dZ = np.multiply(dA, np.where(vals['A' + str(i)]>=0, 1, 0))
        # Calculating the gradients for the parameters
        if i==1:
            grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
        else:
            grads['W' + str(i)] = 1/m * np.dot(dZ,vals['A' + str(i-1)].T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
    return grads

#### Updating the parameters

In [18]:
def updParams(params, gradients, alpha):
    layers = len(params)//2
    params_updated = {}
    # Updating the parameters in layer by layer
    for i in range(1,layers+1):
        params_updated['W' + str(i)] = params['W' + str(i)] - alpha * gradients['W' + str(i)]
        params_updated['B' + str(i)] = params['B' + str(i)] - alpha * gradients['B' + str(i)]
    return params_updated

#### Creating the model

In [19]:
def model(X_train, Y_train, layer_sizes, num_iters, learning_rate):
    params = initParams(layer_sizes)
    for i in range(num_iters):
        values = frwdProp(X_train.T, params)
        cost = getCost(values, Y_train.T)
        grads = backProp(params, values,X_train.T, Y_train.T)
        params = updParams(params, grads, learning_rate)
        print('Cost at iteration ' + str(i+1) + ' = ' + str(cost) + '\n')
    return params

#### Accuracy calculation (using mean squared error)

In [20]:
def compute_accuracy(X_train, X_test, Y_train, Y_test, params):
    # Calculating the predictions
    trainVals = frwdProp(X_train.T, params)
    testVals = frwdProp(X_test.T, params)
    # Calculating the accuracy of the model
    trainAcc = np.sqrt(mean_squared_error(Y_train, trainVals['A' + str(len(layer_sizes)-1)].T))
    testAcc = np.sqrt(mean_squared_error(Y_test, testVals['A' + str(len(layer_sizes)-1)].T))
    return trainAcc, testAcc

#### Making predictions

In [21]:
def predict(X, params):
    # Making predictions using forward propagation
    vals = frwdProp(X.T, params)
    preds = vals['A' + str(len(vals)//2)].T
    return preds

--------------------------------------------------------------------------------------------
#### **1. Making predictions to the boston housing dataset using the Neural network built from scratch**

In [22]:
# Initialising hyper parameters
layer_sizes = [13, 10, 5, 1]                                                       #set layer sizes, do not change the size of the first and last layer 
num_iters = 100                                                                 #set number of iterations over the training set(also known as epochs in batch gradient descent context)
learning_rate = 0.05

Import the dataset and train the model

In [26]:
boston_dataset =  pd.read_csv('boston.csv')

n=boston_dataset.shape[1]
m=boston_dataset.shape[0]

# Splitting the dataset into the Training set and Test set
x_train=np.array(boston_dataset.iloc[:int(0.8*m),:n-1])
y_train=np.array(boston_dataset.iloc[:int(0.8*m),-1])  
x_test=np.array(boston_dataset.iloc[int(0.8*m):,:n-1])
y_test=np.array(boston_dataset.iloc[int(0.8*m):,-1])

# Creating the model and training it
params = model(x_train, y_train, layer_sizes, num_iters, learning_rate)

# Predicting the data
y_predict = predict(x_test,params)

# calculating the accuracy of the model using the Root Mean Squared Error
import math
error = math.sqrt(np.mean((y_test - y_predict)**2))

print("Root Mean Squared error of the model: ", error)

Cost at iteration 1 = 335.05775066067906

Cost at iteration 2 = 334.983427783438

Cost at iteration 3 = 334.9086853606264

Cost at iteration 4 = 334.83370631342547

Cost at iteration 5 = 334.75852404468225

Cost at iteration 6 = 334.6830234183852

Cost at iteration 7 = 334.6072866360732

Cost at iteration 8 = 334.5313435295578

Cost at iteration 9 = 334.45491927373735

Cost at iteration 10 = 334.37800091906917

Cost at iteration 11 = 334.30060254319386

Cost at iteration 12 = 334.2226990719891

Cost at iteration 13 = 334.14421908227763

Cost at iteration 14 = 334.06511161432184

Cost at iteration 15 = 333.98529672552235

Cost at iteration 16 = 333.90465515291123

Cost at iteration 17 = 333.823055310305

Cost at iteration 18 = 333.74033155236657

Cost at iteration 19 = 333.6562825565041

Cost at iteration 20 = 333.57065320087065

Cost at iteration 21 = 333.4830592781778

Cost at iteration 22 = 333.39309612732706

Cost at iteration 23 = 333.30042958227375

Cost at iteration 24 = 333.2044

--------------------------------------------------------------------------------------------
#### **2. Making predictions to the seeds dataset using the Neural network built from scratch**

In [31]:
# Initialise the hyperparameters
num_iters = 1000                                                                  #set number of iterations over the training set(also known as epochs in batch gradient descent context)
learning_rate = 0.03
layer_sizes=[7,5,5,3]

df =  pd.read_csv('seeds.csv')
n=df.shape[1]
m=df.shape[0]

x_train=np.array(df.iloc[:int(0.9*m),:n-1])      # we take 90% of rows in training data
y_train=np.array(df.iloc[:int(0.9*m),-1])        # only the last column is in y_train  
x_test=np.array(df.iloc[int(0.9*m):,:n-1])       # only 10 % of rows is in testing dataset
y_test=np.array(df.iloc[int(0.9*m):,-1])         # splitting into training and testing dataset 
params = model(x_train, y_train, layer_sizes, num_iters, learning_rate)
y_predict = predict(x_test,params)

# for i in range(0, len(y_predict)):
#     if(y_predict[i]>=0.5):
#         y_predict[i]=1
#     else:
#         y_predict[i]=0

print(y_predict)

Cost at iteration 1 = 6.346144179581963

Cost at iteration 2 = 6.344413631083732

Cost at iteration 3 = 6.342683635135125

Cost at iteration 4 = 6.3409541915871275

Cost at iteration 5 = 6.339225300302825

Cost at iteration 6 = 6.337496961036965

Cost at iteration 7 = 6.335769173612687

Cost at iteration 8 = 6.334041937887157

Cost at iteration 9 = 6.332315253659583

Cost at iteration 10 = 6.330589120743215

Cost at iteration 11 = 6.328863538961421

Cost at iteration 12 = 6.327138508137628

Cost at iteration 13 = 6.325414028095316

Cost at iteration 14 = 6.3236900986580284

Cost at iteration 15 = 6.32196671964936

Cost at iteration 16 = 6.320243890892969

Cost at iteration 17 = 6.318521612281032

Cost at iteration 18 = 6.316799883596457

Cost at iteration 19 = 6.315078704635222

Cost at iteration 20 = 6.313358075308924

Cost at iteration 21 = 6.311637995448645

Cost at iteration 22 = 6.309918464783063

Cost at iteration 23 = 6.308199483136238

Cost at iteration 24 = 6.306481050332283

