In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Updates:
1. sigmoid function mistake
2. Bias not added
3. backward derivative
4. forward derivative
5. dont need NN until we have lots of data and features 

In [557]:
class NeuralNetwork(object):
    
    def __init__(self):
        
        self.layer_count = 1
        
        self.parameters = {}
        self.results = {}
        
    def W_initializer(self, shape):
        
        '''
        This function will return a matrix of normally distributed random values of 0 mean and 1 std. 
        Parameters:
        shape: List, Tuple
            Shape will be a tuple of two values input shape and output shape values. 
        '''
        return np.random.randn(shape[0], shape[1])
    
    def Bias_initializer(self, shape):
        '''
        This function will return a vector of normally distributed random values of 0 mean and 1 std. 
        parameters
        shape: int
            Output Vector size
        '''
        return np.random.normal(size = shape)
    
    def sigmoid(self, x):
        '''
        This function will return sigmoid activated value. 
        parameters
        x:
            x will be the power value, a matrix multiplication of theta vector and feature vector. 
        '''
        #print(x)
        return 1 / (1 + np.exp(-x))   # 1 / (1 * np.exp(-x)) this is mot multiply
    
    def relu(self, x):
        '''
        ReLU is an activation function
        '''
        return np.maximum(x, np.zeros(x.shape))
    
    def soft_max(self, x):
        
        '''
        Soft max is an activation function vector. 
        parameters
        x: np.array
            x will be the power values
        '''
        exp_x = np.exp(x)
        return exp_x / np.sum(exp_x, axis = 1)[:, None]
    
    def add_layer(self, in_shape, out_shape, activation = 'relu'):
        
        '''
        This funtion will add a layer into the neural network. Assigning all the parameters to the neural network. 
        Also assigning the activation function into the paramters. 
        '''
        
        #self.parameters['layer: ' + str(self.layer_count)] = {'W': self.W_initializer((in_shape, out_shape)), 'b': np.ones((out_shape))}
        
        self.parameters['layer: ' + str(self.layer_count)] = {'W': self.W_initializer((in_shape, out_shape)), 'b': self.Bias_initializer((out_shape))}
        
        
        if (activation == 'relu'):
            self.parameters['layer: ' + str(self.layer_count)]['Activation'] = self.relu
        elif (activation == 'sigmoid'):
            self.parameters['layer: ' + str(self.layer_count)]['Activation'] = self.sigmoid
        elif (activation == 'soft_max'):
            self.parameters['layer: ' + str(self.layer_count)]['Activation'] = self.soft_max
            
        self.layer_count += 1
    
    def dense_layer(self, x, parameters):
        
        '''
        Dense Layer is the matrix multiplication of Weight matrix and feature vector then adding the bias in it. 
        Activation function will call after operation. 
        '''
        W = parameters['W']
        bias = parameters['b']
        activation = parameters['Activation']
        Z = np.matmul(x, W) + bias
        return activation(Z)
    
    
    def forward_pass(self, x):
        
        '''
        Forward Pass
        '''
        result = x
        self.results['A0'] = result
        for i in range(1, self.layer_count):
            result = self.dense_layer(result, self.parameters['layer: ' + str(i)])
            self.results['A' + str(i)] = result
        return result
          
    
    
    
    def del_forward_pass(self):
        '''
        This function will calculate the derivatives for the forward pass. 
        '''
        
        dev_forpass = {}
        
        # adding 
        bias = np.ones([1 , self.results['A' + str(0)].shape[0]])
        
        for i in range(1, self.layer_count):
            act = self.parameters['layer: ' + str(i)]['Activation'] ## Taking activation for calculating different derivatives
            result = self.results['A' + str(i - 1)]
            if (act == self.relu):
                dev_forpass['layer: ' + str(i)] = {'dW': result.T, 'db': result.T}
            
            elif (act == self.sigmoid): 
                ######### YOUR CODE ###########
                #sig = self.sigmoid(result.T)
                #dev_forpass['layer: ' + str(i)] = {'dW': ((sig * (1 - sig)) * result.T), 'db': (1 - sig)}
                
                ########### CHANGE CODE #############
                
                # results already contains output of sigmoid so you dont need to pass your data through 
                # sigmoid function again.  results = {A0 , A1=sig(A0) , A2=sig(A1) }
                
                # derivative is also wrong
                sig = self.results['A' + str(i)]
                dev_forpass['layer: ' + str(i)] = {'dW': np.matmul(result.T , (sig * (1 - sig))), 'db': np.matmul( bias , (sig * (1 - sig)))}
                 
        return dev_forpass
    
    def del_backward_pass(self ,  error):
        
        '''
        In backward propogation we have to calculate all the values 
        
        '''
        #dev_backpass = {}
        #theta_mul = np.ones((in_shape, 1)) * error ## Last Layer dL / dA
        
        #for i in range(self.layer_count - 1, 1, -1):
         #   dev_backpass['layer: '+ str(i)] = {'dW': theta_mul, 'db': 1} ## Storing del Activations 
          #  theta_val = self.parameters['layer: ' + str(i)] ## Taking thetas of l + 1 layer
          #  theta_mul =  theta_mul @ theta_val['W'].T ## Calculating del Activation 

        #dev_backpass['layer: '+ str(i - 1)] = {'dW': theta_mul, 'db': 1}
        #return dev_backpass
    

        dev_backpass = {}
        dev_backpass['layer: ' + str(self.layer_count-1)] = {'dW': error , 'db': error }  
                
        for i in range(self.layer_count - 3, -1, -1):   # 2 = 3 , 1 = 2 , 0 = 1
            theta_val = self.parameters['layer: ' + str(i+1)]                   ## Taking thetas of l + 1 layer
            theta_mul =  error * theta_val['W']                                 ## Calculating del Activation 
            bias_mul = error * theta_val['b']
            dev_backpass['layer: '+ str(i+1)] = {'dW': theta_mul, 'db': bias_mul} ## Storing del Activations 
        
        return dev_backpass
    
    
    
    def predict_prob(self, x):
        return self.forward_pass(x)
    
    def predict(self, x):
        return np.argmax(self.forward_pass(x))
    
    def GradientDescentOptimizer(self, x, y, alpha, epoch = 100):
        loss_dir = []
        for _ in range(epoch):
            result = self.forward_pass(x)
            del_loss  = self.biclass_cross_entropy( result , y)
            del_forward = self.del_forward_pass()
            
            #del_backward = self.del_backward_pass(x.shape[0], del_loss)
            del_backward = self.del_backward_pass( del_loss)
            
            for i in range(1, self.layer_count):
                self.parameters['layer: ' + str(i)]['W'] -= (1/x.shape[0])*(alpha * (del_forward['layer: ' + str(i)]['dW'] * del_backward['layer: ' + str(i)]['dW']))
                #self.parameters['layer: ' + str(i)]['b'] -= alpha * (del_forward['layer: ' + str(i)]['db'] * del_backward['layer: ' + str(i)]['db'])

            loss = self.Biclass_Loss(result, y)
            print ('Loss: ', loss)
            loss_dir.append(loss)
        return loss_dir
    
    def AdamOptimizer(self, x, y, alpha, epoch = 100):
        pass
    
    def AdagradeOptimizer(self, x, y, alpha, epoch = 100):
        pass
        
    def RMSpropOptimizer(self, x, y, alpha, epoch = 100):
        pass
    
    def MomentumUpdateOptimizer(self, x, y, alpha, epoch = 100):
        pass
    
    def NesterovMomentumOptimizer(self, x, y, alpha, epoch = 100):
        pass
    
    def softmax_cross_entropy(self, logits, labels):
        return (- np.mean( labels - np.log(self.soft_max(logits))))
    
    def Biclass_Loss(self, x, y):
        sig = self.sigmoid(x)
        return -np.mean( (y * np.log(sig)) + ((1 - y) * np.log(1 - sig)))
    
    def multiclass_Loss(self, x, y):
        return -np.mean(y * np.log(self.soft_max(x)))
    
    def biclass_cross_entropy(self, x, y):
        return -np.mean((np.log(self.sigmoid(x)) - y) * x)
        
    def fit(self, x, y, learning_rate, optimizer = 'grad_dst', epoch = 100):
        
        if (optimizer == 'grad_dst'):
            self.GradientDescentOptimizer(x, y, learning_rate, epoch)
        elif (optimizer == 'adam'):
            pass
        elif (optimizer == 'rmsprop'):
            pass
        elif(optimizer == 'adagrade'):
            pass

In [558]:
nn = NeuralNetwork()
nn.add_layer(3, 2, 'sigmoid')
nn.add_layer(2, 2, 'sigmoid')
nn.add_layer(2, 2, 'sigmoid')
nn.add_layer(2, 2, 'sigmoid')
nn.add_layer(2, 2, 'sigmoid')
nn.add_layer(2, 2, 'sigmoid')

nn.add_layer(2, 1, 'sigmoid')

In [559]:
data = np.random.normal(5, 2, (10000, 3))
y = np.random.randint(0, 2, (10000, 1))

In [560]:
nn.fit(data, y, 0.1, epoch = 100)
#nn.del_backward_pass(2,.06665)
#nn.del_backward_pass(2,)
#nn.del_backward_pass( nn.biclass_cross_entropy((nn.forward_pass(data)) , y) )

#nn.del_forward_pass()

Loss:  0.7623442134009567
Loss:  0.7619306118893472
Loss:  0.7615140288960971
Loss:  0.7610945010445517
Loss:  0.7606720674019561
Loss:  0.760246769513479
Loss:  0.7598186514332927
Loss:  0.7593877597524882
Loss:  0.7589541436236046
Loss:  0.7585178547815596
Loss:  0.7580789475607626
Loss:  0.7576374789082013
Loss:  0.7571935083922983
Loss:  0.7567470982073359
Loss:  0.7562983131732616
Loss:  0.755847220730693
Loss:  0.7553938909309506
Loss:  0.754938396420964
Loss:  0.7544808124229055
Loss:  0.754021216708418
Loss:  0.7535596895673236
Loss:  0.7530963137707016
Loss:  0.7526311745282461
Loss:  0.7521643594398071
Loss:  0.7516959584410284
Loss:  0.7512260637429832
Loss:  0.7507547697656975
Loss:  0.7502821730654232
Loss:  0.7498083722555029
Loss:  0.749333467920646
Loss:  0.7488575625244318
Loss:  0.7483807603098973
Loss:  0.7479031671931718
Loss:  0.7474248906502886
Loss:  0.7469460395975834
Loss:  0.7464667242663862
Loss:  0.7459870560730367
Loss:  0.7455071474854646
Loss:  0.74502711

In [497]:
del_forw = nn.del_forward_pass()
del_back = nn.del_backward_pass(0.75)
parameters = {}
#for i in range(1, self.layer_count):
print((del_forw['layer: ' + str(1)]['db'] * del_back['layer: ' + str(1)]['db']))
print((del_forw['layer: ' + str(2)]['db'] * del_back['layer: ' + str(2)]['db']))
print((del_forw['layer: ' + str(3)]['db'] * del_back['layer: ' + str(3)]['db']))

#nn.del_backward_pass(0.75)

#for i in range(3):
 #   print(del_forw['layer: '+str(i+1)]['dW'].shape)

#for i in range(3):
 #   print(del_back['layer: '+str(i+1)]['dW'].shape)

    

[[145.14039977  86.16445243   5.02503252]]
[[179.55169657  41.61452801]]
[[123.74798459]]
