# Sigmoid function and its derivative

In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def derOFsig(x):
    return sigmoid(x)*(1 - sigmoid(x))

# Weights and biases

In [2]:
w1 = np.array([[1, 2],
               [-2, -1],
               [0, -3]])

b1 = np.array([1, 0, 2])

w2 = np.array([[2, 3, -2],
               [1, -2, 1]])

b2 = np.array([1, -1])

w3 = np.array([4, -1])

b3 = np.array([2])

a0 = np.array([0.1 ,0.3])

a3 = np.dot(w3, sigmoid(np.dot(w2, sigmoid(np.dot(w1, a0) + b1)) + b2)) + b3

print(a3)

[5.18284955]


# Inputs and outputs

In [3]:
z1 = np.dot(w1,a0)
a1 = sigmoid(z1+b1)

z2 = np.dot(w2,a1)
a2 = sigmoid(z2+b2)

z3 = np.dot(w3,a2)
a3 = (z3+b3)
print(a3)

[5.18284955]


# Calculation of deltas

In [4]:
y = 0.5
delta3 = 2*(a3-y)
delta2 = np.dot(np.atleast_2d(w3).transpose(),delta3) * derOFsig(z2)
delta1 = np.dot(np.atleast_2d(w2).transpose(),delta2) * derOFsig(z1)

print("delta3:")
print(delta3,"\n")
print("delta2:")
print(delta2,"\n")
print("delta1:")
print(delta1)

delta3:
[9.36569911] 

delta2:
[ 6.22140123 -1.97196571] 

delta1:
[ 2.3215193   5.31299568 -2.96223929]


# The gradient of the cost function

In [5]:
gradC = np.atleast_2d(np.concatenate((delta1*a0[0], delta1*a0[1], delta2*a1[0], delta2*a1[1],
                        delta2*a1[2], delta3*a2, delta1, delta2, delta3), axis=0)).transpose()

print(gradC)

[[ 0.23215193]
 [ 0.53129957]
 [-0.29622393]
 [ 0.69645579]
 [ 1.5938987 ]
 [-0.88867179]
 [ 5.26041084]
 [-1.6673655 ]
 [ 2.34883198]
 [-0.74449725]
 [ 4.66766914]
 [-1.4794872 ]
 [ 8.5300725 ]
 [ 4.31067877]
 [ 2.3215193 ]
 [ 5.31299568]
 [-2.96223929]
 [ 6.22140123]
 [-1.97196571]
 [ 9.36569911]]


# Weights and biases

In [6]:
wb_old =  np.atleast_2d(np.concatenate((w1[:,0], w1[:,1], w2[:,0], w2[:,1], w2[:,2], w3, b1, b2, b3), axis=0)).transpose()

print(wb_old)

[[ 1]
 [-2]
 [ 0]
 [ 2]
 [-1]
 [-3]
 [ 2]
 [ 1]
 [ 3]
 [-2]
 [-2]
 [ 1]
 [ 4]
 [-1]
 [ 1]
 [ 0]
 [ 2]
 [ 1]
 [-1]
 [ 2]]


# Calculation of new weights and biases using gradient descent algorithm

In [7]:
wb_new = wb_old - gradC

print(wb_new)

[[ 0.76784807]
 [-2.53129957]
 [ 0.29622393]
 [ 1.30354421]
 [-2.5938987 ]
 [-2.11132821]
 [-3.26041084]
 [ 2.6673655 ]
 [ 0.65116802]
 [-1.25550275]
 [-6.66766914]
 [ 2.4794872 ]
 [-4.5300725 ]
 [-5.31067877]
 [-1.3215193 ]
 [-5.31299568]
 [ 4.96223929]
 [-5.22140123]
 [ 0.97196571]
 [-7.36569911]]
