In [1]:
import numpy as np
from math import sqrt

In [2]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def der_sigmoid(x):
    return np.exp(x)/(1+np.exp(x))**2

In [3]:
#synapses 
synl1 = np.array([[0.8,0.2],[0.4,0.9],[0.3,0.5]] )
synl2 = np.array([0.3,0.5,0.9] )

In [4]:
# input
a1 = np.array([1.,1.])

### First the feedforward, layer by layer

In [5]:
# layer 2
z2 = np.matmul(synl1,a1)
a2 = sigmoid(z2)
print(z2)
print(a2)

[ 1.   1.3  0.8]
[ 0.73105858  0.78583498  0.68997448]


In [6]:
# layer 3 - output
z3 = np.matmul(synl2,a2)
a3 = sigmoid(z3)
print(z3)
print(a3)

1.23321209813
0.774380272053


## Now the back propagation. First: the $\partial J/\partial z_i^{(l)}$

In [7]:
#layer 3
D3=(a3-0)*der_sigmoid(z3)
print(D3)

0.135296210332


In [8]:
#layer 2
D2=D3*synl2*der_sigmoid(z2)
print(D2)

[ 0.00798025  0.01138507  0.02604705]


## Now the derivative with respect to the weights $\partial J/\partial W_{ij}^{(l)}$

In [9]:
# synapses between layers 2-3
der23 = a2*D3
print(der23)

[ 0.09890946  0.1063205   0.09335093]


In [10]:
# synapses between layers 1-2
der12 = np.matmul(np.array([a1]).T,np.array([D2]))
print(der12)

[[ 0.00798025  0.01138507  0.02604705]
 [ 0.00798025  0.01138507  0.02604705]]


## Now the gradient of the cost function wrt to all weights

In [11]:
alpha = 5.

In [12]:
#update of the gradient
synl1 -= alpha*der12.T
synl2 -= alpha*der23.T

In [16]:
print ("Layer1: ", synl1)
print ("Layer2: ",synl2)

Layer1:  [[ 0.76009873  0.16009873]
 [ 0.34307467  0.84307467]
 [ 0.16976473  0.36976473]]
Layer2:  [-0.19454728 -0.03160248  0.43324534]


## What happens if we evaluate the model again?

In [13]:
# layer 2
z2 = np.matmul(synl1,a1)
a2 = sigmoid(z2)
print(z2)
print(a2)
# layer 3 - output
z3 = np.matmul(synl2,a2)
a3 = sigmoid(z3)
print(z3)
print(a3)

[ 0.92019745  1.18614935  0.53952946]
[ 0.71508234  0.76605167  0.63170295]
0.110355908096
0.527561011887
