In [8]:
# Python CODE
import numpy as np
# DO NOT FORGET TO SPECIFY THE SAME SEED
np.random.seed (12345)
def initialize (input_dim, hidden_dim, output_dim, batchsize):
    W1 = np.random.randn(hidden_dim, input_dim) * 0.01
    b1 = np.zeros((hidden_dim,))
    W2 = np.random.randn(hidden_dim, hidden_dim) * 0.01
    b2 = np.zeros((hidden_dim,))
    W3 = np.random.randn(output_dim, hidden_dim) * 0.01
    b3 = np.zeros((output_dim,))
    # list of all network parameters
    parameters = [W1, b1, W2, b2, W3, b3 ]
    # minibatch of input instances
    x = np.random.rand(input_dim, batchsize)
    # ground truths
    y = np.random.randn(output_dim, batchsize)
    return parameters, x, y
# initialize parameters, inputs and targets
parameters, x, y = initialize (3, 4, 2, 5)

In [9]:
class NN:
    def __init__(self):
        # optimizable weights and bias
        self.W1 = []
        self.b1 = []
        self.W2 = []
        self.b2 = []
        self.W3 = []
        self.b3 = []
        
        # Results of the forward pass before and after activation
        self.layer1BefAct = []
        self.layer1 = []
        self.layer2BefAct = []
        self.layer2 = []
        self.layer3BefAct = []
        self.layer3 = []
        
        # Hidden gradiants calculated in backwards pass
        self.L3OutputGrad = []
        self.L2OutputGrad = []
        self.L2WeightedGrad = []
        self.L1OutputGrad = []
        self.L1WeightedGrad = []
        
        # gradients of weight and bias calculated in backwards pass
        self.w3_gradient = []
        self.b3_gradient = []
        self.w2_gradient = []
        self.b2_gradient = []
        self.w1_gradient = []
        self.b1_gradient = []
        
    # Functions:
    def sigmoid(self, x):
        return 1 / (1+np.exp(-x))
    def sigmoidDeriv(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
        
    def setParameters(self, param):
        self.W1 = param[0]
        self.b1 = param[1]
        self.W2 = param[2]
        self.b2 = param[3]
        self.W3 = param[4]
        self.b3 = param[5]
        
    def loss(self, pred, y):
        # squared loss function
        M = y.shape[1]
        return (1. / M) * np.sum( np.sum(.5 * (pred - y)**2, axis = 0))
    
    def dloss(self, pred,y):
        # derivative of the squared loss function
        M = y.shape[1]
        return(pred - y) / M
    
    def layerExec(self, input, weight, bias, act):
        # forward pass execution of neuron layer
        output = np.dot(weight, input) + bias[:,None]
        #run sigmoid when needed
        if act:
            outputAct = self.sigmoid(output)
        else:
            outputAct = output
        return output, outputAct

    def forwardPass(self, input, target):
        # forward pass of through all layers of the NN
        self.layer1BefAct, self.layer1 = self.layerExec(input, self.W1, self.b1, True)
        self.layer2BefAct, self.layer2 = self.layerExec(self.layer1, self.W2, self.b2, True)
        self.layer3BefAct, self.layer3 = self.layerExec(self.layer2, self.W3, self.b3, False)
        return self.loss(self.layer3, target)
    
    def backwardPass(self, input, target, printResults):
        # backwards pass through all layers of the NN
        self.L3OutputGrad = self.dloss(self.layer3, target)
        self.L2OutputGrad = np.dot(self.W3.T, self.L3OutputGrad)
        self.L2WeightedGrad = self.L2OutputGrad * self.sigmoidDeriv(self.layer2BefAct)
        self.L1OutputGrad = np.dot(self.W2.T, self.L2WeightedGrad)
        self.L1WeightedGrad = self.L1OutputGrad * self.sigmoidDeriv(self.layer1BefAct)
        
        # calculation of gradients of weights and bias after backwards pass
        w3_gradient = np.dot(self.L3OutputGrad, self.layer2.T)
        b3_gradient = np.sum(self.L3OutputGrad, axis = 1)
        w2_gradient = np.dot(self.L2WeightedGrad, self.layer1.T)
        b2_gradient = np.sum(self.L2WeightedGrad, axis = 1)
        w1_gradient = np.dot(self.L1WeightedGrad, input.T)
        b1_gradient = np.sum(self.L1WeightedGrad, axis = 1)
        
        # print gradients for Question 4
        if printResults:
            print("w1_gradient:", w1_gradient, "\n")
            print("b1_gradient:", b1_gradient, "\n")
            print("w2_gradient:", w2_gradient, "\n")
            print("b2_gradient:", b2_gradient, "\n")
            print("w3_gradient:", w3_gradient, "\n")
            print("b3_gradient:", b3_gradient, "\n")
        gradients = [w1_gradient, b1_gradient, 
                     w2_gradient, b2_gradient, 
                     w3_gradient, b3_gradient]
        return gradients

In [10]:
myNN = NN()
myNN.setParameters(parameters)
loss = myNN.forwardPass(x,y)
print("Question 1: Loss of the forward Pass: {}".format(loss), "\n")
print("Question 3: Show gradients of loss function for each parameter separately")
gradients = myNN.backwardPass(x, y, True)

Question 1: Loss of the forward Pass: 1.0595073989866606 

Question 3: Show gradients of loss function for each parameter separately
w1_gradient: [[-3.55164801e-07  4.19035853e-07  3.52847230e-06]
 [ 2.02412923e-06  1.85604158e-06  1.01560709e-06]
 [ 1.12728984e-05  8.49377219e-06 -3.51794858e-06]
 [ 6.96166795e-07  6.27452005e-07  2.95038713e-07]] 

b1_gradient: [6.68064134e-06 3.46671844e-06 1.26685662e-06 1.08561520e-06] 

w2_gradient: [[ 3.99898284e-06  4.14561394e-06  3.09285858e-06  1.00056924e-05]
 [-9.83803475e-04 -9.89356662e-04 -9.86819589e-04 -9.82852021e-04]
 [ 5.26033041e-04  5.29099114e-04  5.26928985e-04  5.30215253e-04]
 [ 6.09661103e-04  6.13133340e-04  6.11301153e-04  6.10570429e-04]] 

b2_gradient: [ 6.07123945e-06 -1.96949231e-03  1.05156448e-03  1.22000815e-03] 

w3_gradient: [[-0.29601562 -0.29315414 -0.29558964 -0.29427093]
 [-0.00063616 -0.00062495 -0.00062947 -0.00063094]] 

b3_gradient: [-0.58798803 -0.00125797] 



In [13]:
# Optimizing the NN using a stepsize and multiple epochs
print("Optimize NN using Backpropagation:")
stepsize = 0.1
epochs = 100000
print("Loss Start: {}".format(loss), "\n")
for i in range(epochs):
    j = 0
    for n in parameters:
        parameters[j] = parameters[j] - (gradients[j] * stepsize)
        j += 1
    myNN.setParameters(parameters)
    loss = myNN.forwardPass(x,y)
    gradients = myNN.backwardPass(x, y, False)
    i += 1
    if i % 10000 == 1 or i <= 3:
        print("Loss after {} epochs: {}".format(i, loss), "\n")
print("Loss after {} epochs: {}".format(epochs, loss), "\n")

Optimize NN using Backpropagation:
Loss Start: 3.1989944074154646e-09 

Loss after 1 epochs: 3.1984925766169344e-09 

Loss after 2 epochs: 3.197990824585568e-09 

Loss after 3 epochs: 3.197489151316404e-09 

Loss after 10001 epochs: 6.665423669904652e-10 

Loss after 20001 epochs: 1.389888114028477e-10 

Loss after 30001 epochs: 2.8990455053779295e-11 

Loss after 40001 epochs: 6.047647119152267e-12 

Loss after 50001 epochs: 1.2616634664726289e-12 

Loss after 60001 epochs: 2.63216032832448e-13 

Loss after 70001 epochs: 5.4914433469486175e-14 

Loss after 80001 epochs: 1.1456794431756107e-14 

Loss after 90001 epochs: 2.3902364109319897e-15 

Loss after 100000 epochs: 4.987548973714041e-16 



In [12]:
parameters

[array([[-3.49288549,  1.09913006, -2.50246173],
        [-2.5763079 , -1.97075992,  0.66218477],
        [-8.13738639, -0.83435172,  3.07104694],
        [-1.89310082, -0.22804272, -2.55404911]]),
 array([0.99180998, 1.49733687, 1.96028822, 0.07172028]),
 array([[-1.47091433, -0.21902798,  2.44689452, -0.69588546],
        [-2.64648835, -1.6000124 ,  4.61908868, -1.63929583],
        [-2.36986603, -1.12458137,  4.17080493, -1.39581991],
        [ 1.06935427,  3.10964707, -0.3220495 ,  0.67759025]]),
 array([-0.38104509,  0.32015126,  0.16711511, -0.92562583]),
 array([[-0.05401687, -1.38844162, -0.93789822,  3.28331664],
        [-2.10544546, -4.51555498, -3.97217755,  2.74506886]]),
 array([-0.21452285,  3.22139556])]