## This notebook contains for the code for <a href="https://amanpreetsingh459.github.io/2018/06/09/blog2-deep-neural-network.html" target="_blank">this</a> blog-post

In [1]:
import numpy as np
np.random.seed(3)

In [2]:
#Input array
X=np.array([[1,0,1,0],[1,0,1,1],[0,1,0,1]])

#Output
y=np.array([[1],[1],[0]])

In [3]:
print(np.sum(y))
print(np.sum(y, axis = 0))
print(np.sum(y, axis = 0, keepdims=True))

2
[2]
[[2]]


In [4]:
print(X.shape)
print(y.shape)

(3, 4)
(3, 1)


In [5]:
#Sigmoid Function
def sigmoid (x):
    return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function
def sigmoid_derivatives(x):
    return x * (1 - x)

In [6]:
#Variable initialization
epoch=10001 #Setting training iterations
learning_rate=0.1 #Setting learning rate
inputlayer_neurons = X.shape[1] #number of features in data set
hiddenlayer1_neurons = 5 #number of 1st hidden layer's neurons
hiddenlayer2_neurons = 5 #number of 2nd hidden layer's neurons
output_neurons = 1 #number of neurons at output layer

In [7]:
#weight and bias initialization
weights_input_to_hidden1 = np.random.normal(size=(inputlayer_neurons,hiddenlayer1_neurons))
bias_input_to_hidden1 =np.random.normal(size=(1,hiddenlayer1_neurons))

weights_hidden1_to_hidden2 = np.random.normal(size=(hiddenlayer1_neurons, hiddenlayer2_neurons))
bias_hidden1_to_hidden2 =np.random.normal(size=(1,hiddenlayer2_neurons))

weights_hidden2_to_output = np.random.normal(size=(hiddenlayer2_neurons,output_neurons))
bias_hidden2_to_output=np.random.normal(size=(1,output_neurons))

In [8]:
#Initial weights and biases
print(np.sum(weights_input_to_hidden1))
print(np.sum(weights_hidden1_to_hidden2))
print(np.sum(weights_hidden2_to_output))

print(np.sum(bias_input_to_hidden1))
print(np.sum(bias_hidden1_to_hidden2))
print(np.sum(bias_hidden2_to_output))

-1.80831494926
-12.1964091106
-4.6507139537
-0.691616944068
5.58110257009
1.04814751225


In [9]:
print(weights_input_to_hidden1.shape)
print(weights_hidden1_to_hidden2.shape)
print(weights_hidden2_to_output.shape)

print(bias_input_to_hidden1.shape)
print(bias_hidden1_to_hidden2.shape)
print(bias_hidden2_to_output.shape)

(4, 5)
(5, 5)
(5, 1)
(1, 5)
(1, 5)
(1, 1)


#### Below is the graphical representation of our neural network

<div class="imgcap">
<img src="/_images/blog2_neural_network_image.jpg">
</div>

For the simplicity in the picture i have not added all the links from one layer to another.

In [10]:
for i in range(epoch):
    #Forward Propogation
    hidden_layer1_activations = sigmoid(np.dot(X, weights_input_to_hidden1) + bias_input_to_hidden1)
    hidden_layer2_activations = sigmoid(np.dot(hidden_layer1_activations, weights_hidden1_to_hidden2) + bias_hidden1_to_hidden2)
    output_layer_activations = sigmoid(np.dot(hidden_layer2_activations, weights_hidden2_to_output) + bias_hidden2_to_output)

    #Backpropagation
    
    #getting the error contribution by each layer    
    #output to hidden2
    error_output_layer = y - output_layer_activations
    slope_output_layer = sigmoid_derivatives(output_layer_activations)
    delta_output_layer = error_output_layer * slope_output_layer    
    
    #hidden2 to hidden1
    slope_hidden_layer2 = sigmoid_derivatives(hidden_layer2_activations)    
    error_hidden_layer2 = delta_output_layer.dot(weights_hidden2_to_output.T)
    delta_hidden_layer2 = error_hidden_layer2 * slope_hidden_layer2    
    
    #hidden1 to input
    slope_hidden_layer1 = sigmoid_derivatives(hidden_layer1_activations)
    error_hidden_layer1 = delta_hidden_layer2.dot(weights_hidden1_to_hidden2.T)
    delta_hidden_layer1 = error_hidden_layer1 * slope_hidden_layer1
    
    #weight and bias adjustments 
    #output to hidden2
    weights_hidden2_to_output += hidden_layer2_activations.T.dot(delta_output_layer) * learning_rate
    bias_hidden2_to_output += np.sum(delta_output_layer, axis=0, keepdims=True) * learning_rate
    
    #hidden2 to hidden1
    weights_hidden1_to_hidden2 += hidden_layer1_activations.T.dot(delta_hidden_layer2) * learning_rate
    bias_hidden1_to_hidden2 += np.sum(delta_hidden_layer2, axis=0, keepdims=True) * learning_rate        
    
    #hidden1 to input
    weights_input_to_hidden1 += X.T.dot(delta_hidden_layer1) * learning_rate    
    bias_input_to_hidden1 += np.sum(delta_hidden_layer1, axis=0, keepdims=True) * True
    
    if i != 0 and i % 1000 == 0:
        print("error after {0} steps of training: {1}".format((i/1000*1000),np.sum(error_output_layer)))

error after 1000.0 steps of training: 0.05018205356841837
error after 2000.0 steps of training: 0.033859263420156945
error after 3000.0 steps of training: 0.027105713020409337
error after 4000.0 steps of training: 0.023207920712687027
error after 5000.0 steps of training: 0.02060028777010732
error after 6000.0 steps of training: 0.01870175568700289
error after 7000.0 steps of training: 0.0172412749291508
error after 8000.0 steps of training: 0.016073402728554335
error after 9000.0 steps of training: 0.015112271171374977
error after 10000.0 steps of training: 0.014303526532159678


Here we can see that after every 1000 iterations the error is coming down. That is what the learning is. Below are the changes in the weights from start till last iteration and the outputs actual and learnt.

In [11]:
print(hidden_layer1_activations.shape)
print(hidden_layer2_activations.shape)
print(output_layer_activations.shape)

(3, 5)
(3, 5)
(3, 1)


In [12]:
#Learnt weights and biases
print(np.sum(weights_input_to_hidden1))
print(np.sum(weights_hidden1_to_hidden2))
print(np.sum(weights_hidden2_to_output))

print(np.sum(bias_input_to_hidden1))
print(np.sum(bias_hidden1_to_hidden2))
print(np.sum(bias_hidden2_to_output))

0.734876899466
-19.7401244794
-8.00361029621
-1.59045636135
6.57927464733
2.33363090766


In [13]:
print(y)
print(output_layer_activations)

[[1]
 [1]
 [0]]
[[ 0.98575188]
 [ 0.980885  ]
 [ 0.0190596 ]]
