#### "Coding a 2 layer Neural Network from scratch" and will go through the essentials in making a neural network 

This is with the intention of utilising this base code in my third year individual project. 
https://towardsdatascience.com/coding-a-2-layer-neural-network-from-scratch-in-python-4dd022d19fd2

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import itertools

#### Setting up the neural net

This requires all the spaces in memory which will store values as the net makes a pass going forward, and which it will need to call again later. 
There is still some functionality and python linguistics which I do not understand, however, this is helpful in understanding the overall concepts of the neural net. 


In [2]:
class myNet: 
    
    #This will set up the neural net, with the necessary dictionaries, arrays and matrices which will store the input
    #and other necessary values 
    def init(self, inputVal, results):
        self.X = inputVal
        self.Y = results
        self.Yh = np.zeros((1, self.Y.shape[1]))
        
        
        self.numLayers = 2
        self.dimensions = [9, 15, 1]
        
    
        #these dictionaries are for storing necessary data
        self.param = {}
        self.cache = {}
        self.gradients = {}
        
        self.loss = []
        self.learningRate = 0.003
        self.trainingSamples = self.Y.shape[1]
        
        
        
    #This function will set up the initial weights and biases randomly 
    def paramsInit(self):
        np.randomSeed(1)
        self.param['W1'] = np.random.randn(self.dimensions[1], self.dimensions[0]) / np.sqrt(self.dimensions[0])
        self.param['b1'] = np.zeros(self.dimensions[1], 1)
        
        self.param['W2'] = np.random.randn(self.dimensions[2], self.dimensions[1]) / np.sqrt(self.dimensions[1])
        self.param['b2'] = np.zeros(self.dimensions[2], 1)
    
        return
    
    
    #Here are the functions for the forward pass and initial calculations
    
    #The activation functions we will be using in this forward pass 
    def sigmoid(Z):
        return 1/(1+np.exp(-Z))
        
    def reLu(Z):
        return np.maximum(0, Z)
    
    def forwardPass(self):
        Z1 = self.param['W1'].dot(self.X) + self.param['b1']
        A1 = reLu(Z1)
        self.cache['Z1'], self.cache['A1'] = Z1, A1 
        
        Z2 = self.param['W2'].dot(A1) + self.param['b2']
        A2 = sigmoid(Z2)
        self.cache['Z2'], self.cache['A2'] = Z2, A2 
        
        self.Yh = A2
        loss = self.currentClassificationLoss(A2)
        
        return self.Yh, loss
    
    
    
    #This neural net will be doing binary classification and so is using the loss function called Cross-Entropy
    #loss function
    def currentClassificationLoss(self, Yh):
        loss = (1./self.trainingSamples) * (-np.dot(self.Y, np.log(Yh).T) - np.dot(1-self.Y, np.log(1-Yh).T))
        return loss 
    
    
    
    #now its time for the back propogation
    #this includes finding the differentials for all the functions, so that you can see in which way the gradients 
    #are travelling 
    
    
    def derivativeRelu(x):
        x[x<0] = 0 
        x[x>0] = 1
        return x 
    
    def derivativeSigmoid(Z):
        s = 1/(1 + np.exp(-Z))
        dz = s * ( 1-s)
        return dz
    
    
    #need to find the derivatives for Loss with respect to W1 W2 B1 B2, this is done through calculus and 
    #backpropogation - THE GRADIENT DESCENT OPTIMIZATION ALGORITHM 

    def backwards(self):
        
        derivativeLossYh = - ( np.divide(self.Y, self.Yh) - np.divide(1 - self.Y, 1 - self.Yh))
        derivativeLossZ2 = derivativeLossYh * derivativeSigmoid(self.cache['Z2'])
        derivativeLossA1 = np.dot(self.param["W2"].T, derivativerLossZ2)
        derivativeLossW2 = 1./self.cache['A1'].shape[1] * np.dot(derivativeLossZ2, self.cache['A1'].T)
        derivativeLossB2 = 1./self.cache['A1'].shape[1] * np.dot(derivativeLossZ2, np.ones([derivativeLossZ1.shape[1],1]))
        
        derivativeLossZ1 = derivativeLossA1 * derivateRelu(self.cache['Z1'])
        derivativeLossA0 = np.dot(self.param["W1"].T, derivativeLossZ2)
        derivativeLossW1 = 1./self.X.shape[1] * np.dot(derviativeLossZ1 ,self.X.T)
        derivativeLossB1 = 1./self.X.shape[1] * np.dot(derivativeLossZ1, np.ones([derivativeLossZ1.shape[1],1])) 
        
        
        #if the derivative is negative, it means increasing the weight makes the loss decrease
        #if the derivative is positive, it means decreasing the weight makes the loss decrease 
        self.param["W1"] = self.param["W1"] - self.learningRate * derivativeLossW1
        self.param["b1"] = self.param["b1"] - self.learningRate * derivativeLossB1
        self.param["W2"] = self.param["W2"] - self.learningRate * derivativeLossW2
        self.param["b2"] = self.param["b2"] - self.learningRate * derivativeLossB2
        

    
    
    
        

    
    Now we have completed the forward pass, made a prediction, calculated the loss, and updated the parameters 
    to improve the weightings so they make a better prediction. 
    

In [None]:
nn = myNet(x, y)
nn.gd(x, y, iter = 15000)


# all this does is run through the neural net several times (iterations declared) and it will look for the minima in 
# the region which will reduce the loss. 
def gd(self,X, Y, iter = 3000):
        np.random.seed(1)                         
    
        self.paramsInit()
    
        for i in range(0, iter):
            Yh, loss=self.forwardPass()
            self.backwards()
        
            if i % 500 == 0:
                print ("Cost after iteration %i: %f" %(i, loss))
                self.loss.append(loss)
    
        return

    After this point the most important work is improving the hyperparameters and improving the input data (feature engineering) 