In [1]:
# import the required library
import numpy as np
import math

In [2]:
# sigmoid function
def sigmoid(x):
    return 1/(1+math.exp(-x))

In [3]:
print(sigmoid(1.641))

0.837670961661056


In [4]:
#derivative sigmoid function

def derivative_sigmoid(y):
    derivative = sigmoid(y)*(1-sigmoid(y))
    return derivative

In [5]:
print(derivative_sigmoid(1))

0.19661193324148185


In [6]:
#Initializing Input Variables(X) and output variable(Y)

X=np.array([[1,0,1,0],[1,0,1,1],[0,1,0,1]])
Y=np.array([[1],[1],[0]])

In [7]:
#Initializing Weights and biases with random values

Wh=np.random.rand(4,3)
wout=np.random.rand(3,1)
bh=np.random.rand(1,3)
bout=np.random.rand(1,1)

In [8]:
print(Wh, wout, bh, bout)

[[ 0.88858567  0.40168657  0.33479754]
 [ 0.26728616  0.39384678  0.18237431]
 [ 0.71399387  0.31955421  0.41600759]
 [ 0.53897779  0.48919964  0.76317977]] [[ 0.37212866]
 [ 0.06797963]
 [ 0.90672093]] [[ 0.50933602  0.02365389  0.14609858]] [[ 0.46972155]]


In [12]:
#calculate hidden layer input

hidden_layer_input = np.dot(X,Wh)+bh
print(hidden_layer_input)

[[ 2.11191556  0.74489467  0.89690371]
 [ 2.65089335  1.23409431  1.66008348]
 [ 1.31559997  0.90670031  1.09165266]]


In [15]:
#performing nonlinear activation on hidden linear input(using sigmoid function).

hidden_layer_activation=np.vectorize(sigmoid)(hidden_layer_input)
print(hidden_layer_activation)

[[ 0.89205593  0.67806526  0.7103128 ]
 [ 0.93406603  0.77453437  0.84024921]
 [ 0.78844872  0.71232447  0.7486928 ]]


In [16]:
#perform linear and nonlinear transformation of hidden layer at output layer

output_layer_input = np.dot(hidden_layer_activation,wout)+bout
output = np.vectorize(sigmoid)(output_layer_input)
print(output_layer_input)
print(output)

[[ 1.49183123]
 [ 1.6318384 ]
 [ 1.49040491]]
[[ 0.81635297]
 [ 0.83642132]
 [ 0.81613904]]


In [17]:
#calculate the gradient of error at output layer

Error = Y-output
print(Error)

[[ 0.18364703]
 [ 0.16357868]
 [-0.81613904]]


In [18]:
#compute slope at output and hidden layer

slope_output_layer = np.vectorize(derivative_sigmoid)(output)
slope_hidden_layer= np.vectorize(derivative_sigmoid)(hidden_layer_activation)
print(slope_output_layer)
print(slope_hidden_layer)

[[ 0.2125726 ]
 [ 0.21091037]
 [ 0.21259019]]
[[ 0.20618754  0.22333091  0.22093989]
 [ 0.20251982  0.2159594   0.21059072]
 [ 0.21484445  0.22078821  0.21799701]]


In [19]:
#compute delta at output layer

lr=0.1
d_output = Error*slope_output_layer*lr
print(d_output)

[[ 0.00390383]
 [ 0.00345004]
 [-0.01735032]]


In [20]:
#calculate Error at hidden layer

wout_transpose = np.transpose(wout)
Error_hidden_layer=np.dot(d_output, wout_transpose)
print(Error_hidden_layer)

[[ 0.00145273  0.00026538  0.00353969]
 [ 0.00128386  0.00023453  0.00312823]
 [-0.00645655 -0.00117947 -0.01573189]]


In [21]:
#compute delta hidden layer

d_hidden_layer = Error_hidden_layer * slope_hidden_layer
print(d_hidden_layer)

[[  2.99534407e-04   5.92678022e-05   7.82057982e-04]
 [  2.60007141e-04   5.06495442e-05   6.58775594e-04]
 [ -1.38715382e-03  -2.60412638e-04  -3.42950589e-03]]


In [22]:
#update weight at both output and hidden layer

hidden_layer_activation_transpose=np.transpose(hidden_layer_activation)
wout=wout+np.dot(hidden_layer_activation_transpose, d_output) *lr 
print(wout)
X_transpose = np.transpose(X)
Wh = Wh + np.dot(X_transpose, d_hidden_layer) *lr 
print(Wh)

[[ 0.37143117]
 [ 0.06727565]
 [ 0.90598911]]
[[ 0.88864163  0.40169756  0.33494162]
 [ 0.26714744  0.39382074  0.18203136]
 [ 0.71404982  0.3195652   0.41615167]
 [ 0.53886508  0.48917866  0.7629027 ]]


In [23]:
#update biases at both output and hidden layer

bh = bh + np.sum(d_hidden_layer, axis=0)*lr
bout=bout+ np.sum(d_output, axis=0)*lr
print(bh,bout)

[[ 0.50925326  0.02363885  0.14589971]] [[ 0.46872191]]
