In [1]:
import math

w1 = 0.15
w2 = 0.2
w3 = 0.25
w4 = 0.3
w5 = 0.4
w6 = 0.45
w7 = 0.5
w8 = 0.55

b1 = 0.35
b2 = 0.6

x1 = 0.05
x2 = 0.1

target_o1 = 0.01
target_o2 = 0.99

In [2]:
def activation(x):
    # relu
    return max(0, x)
#     return 1 / (1 + math.exp(-x))

# Forward pass

## Compute output of hidden layer

In [3]:
net_h1 = w1*x1+w2*x2+b1
net_h1

0.3775

In [4]:
out_h1 = activation(net_h1)
out_h1

0.3775

In [5]:
net_h2 = w3*x1+w4*x2+b1
net_h2

0.39249999999999996

In [6]:
out_h2 = activation(net_h2)
out_h2

0.39249999999999996

## Compute output of output layer

In [7]:
net_o1 = out_h1*w5+out_h2*w6+b2
net_o1

0.9276249999999999

In [8]:
out_o1 = activation(net_o1)
out_o1

0.9276249999999999

In [9]:
net_o2 = out_h1*w7 + out_h2*w8 + b2
out_o2 = activation(net_o2)
out_o2

1.0046249999999999

## Calculate error

In [10]:
E_o1 = 0.5*(target_o1 - out_o1)**2
E_o1

0.4210178203124999

In [11]:
E_o2 = 0.5*(target_o2 - out_o2)**2
E_o2

0.00010694531249999836

In [12]:
E_total = E_o1 + E_o2
E_total

0.4211247656249999

# Backward pass
## Calculate $w_8^{\text{(next)}}$

$\frac{\partial E_\text{total}}{\partial w_8}$
$ = \frac{\partial E_\text{total}}{\partial out_{o_2}} $
$ \times \frac{\partial out_{o_2}}{\partial net_{o_2}} $
$ \times \frac{\partial net_{o_2}}{\partial w_8} $

$E\_total = \frac{1}{2} (target_{o1} - out_{o1})^2 + \frac{1}{2}(target_{o2} - out_{o2})^2$

In [13]:
E_total = 0.5*(target_o1-out_o1)**2 + 0.5*(target_o2-out_o2)**2
E_total

0.4211247656249999

$\frac{\partial E_\text{total}}{\partial out_{o_2}} = 2 \frac{1}{2} (target_{o2}-out_{o2}) (-1 out_{o2}^0) = out_{o2} - target_{o2}$

In [14]:
dE_total_dout_o2 = out_o2 - target_o2
dE_total_dout_o2

0.014624999999999888

$out_{o2} = relu(net_{o2})$

$\frac{\partial out_{o_2}}{\partial net_{o_2}} = \begin{cases}
		 0, & \text{if}\ net_{o2} <0 \\
		  1, & \text{otherwise}
		\end{cases}$

In [15]:
net_o2

1.0046249999999999

In [16]:
dout_o2_dnet_o2 = 0 if net_o2 < 0 else 1
dout_o2_dnet_o2

1

$\frac{\partial out_{o_2}}{\partial net_{o_2}} = \begin{cases}
		 0, & \text{if}\ net_{o2} <0 \\
		  1, & \text{otherwise}
		\end{cases} = 1$

$net_{o2} = w_7 \cdot out_{h1} + w8 \cdot out_{h2}$

$\frac{\partial net_{o_2}}{\partial w_8} = out_{h2}$

In [17]:
dnet_o2_dw8 = out_h2
dnet_o2_dw8

0.39249999999999996

$\frac{\partial E_\text{total}}{\partial w_8}$
$ = \frac{\partial E_\text{total}}{\partial out_{o_2}} $
$ \times \frac{\partial out_{o_2}}{\partial net_{o_2}} $
$ \times \frac{\partial net_{o_2}}{\partial w_8} $

In [18]:
dE_total_dout_o2

0.014624999999999888

In [19]:
dout_o2_dnet_o2

1

In [20]:
dnet_o2_dw8

0.39249999999999996

In [21]:
dE_total_dw8 = dE_total_dout_o2 * dout_o2_dnet_o2 * dnet_o2_dw8
dE_total_dw8

0.0057403124999999555

$\eta = 0.5$

In [22]:
eta = 0.5

$w_8^\text{(next)} = w_8 - \eta  \frac{\partial E_\text{total}}{\partial w_8}$

In [23]:
w8_next = w8 - eta * dE_total_dw8
w8_next

0.5471298437500001

## Calculate $w_2^{\text{(next)}}$

$\frac{\partial E_{total}}{\partial w_2} = \frac{\partial E_{total}}{\partial out_{h1}} \times \frac{\partial out_{h1}}{\partial net_{h1}} \times \frac{\partial net_{h1}}{\partial w_2}$

$\frac{\partial E_{total}}{\partial out_{h1}}
= \frac{\partial E_{o1}}{\partial out_{h1}}
\times \frac{\partial E_{o2}}{\partial out_{h1}}$

$\frac{\partial E_{o1}}{\partial out_{h1}}
= \frac{\partial E_{o1} }{\partial out_{o1}}
\times \frac{\partial out_{o1} }{\partial net_{o1}}
\times \frac{\partial net_{o1}}{\partial out_{h1}}$

$\frac{\partial E_{o1} }{\partial out_{o1}}
= out_{o1} - target_{o1}$

In [24]:
dE_o1_dout_o1 = out_o1 - target_o1
dE_o1_dout_o1

0.9176249999999999

$\frac{\partial out_{o1}}{\partial net_{o1}}
= 1$

In [25]:
dout_o1_dnet_o1 = 1
dout_o1_dnet_o1

1

$\frac{\partial net_{o1}}{\partial out_{h1}} = w_5$

In [26]:
dnet_o1_dout_h1 = w5
dnet_o1_dout_h1

0.4

In [27]:
dE_o1_dout_h1 = dE_o1_dout_o1 * dout_o1_dnet_o1 * dnet_o1_dout_h1
dE_o1_dout_h1

0.36705

$\frac{\partial E_{o2} }{\partial out_{h1}}
= \frac{\partial E_{o2}}{\partial out_{o2}}
\times \frac{\partial out_{o2}}{\partial net_{o2}}
\times \frac{\partial net_{o2}}{\partial out_{h1}}
$

$\frac{\partial E_{o2}}{\partial out_{o2}}
= out_{o2} - target_{o2}$

In [28]:
dE_o2_dout_o2 = out_o2 - target_o2
dE_o2_dout_o2

0.014624999999999888

$\frac{\partial out_{o2}}{\partial net_{o2}}
= 1$

In [29]:
dout_o2_dnet_o2 = 1
dout_o2_dnet_o2

1

$\frac{\partial net_{o2}}{\partial out_{h1}}
= w_7$

In [30]:
dnet_o2_dout_h1 = w7
dnet_o2_dout_h1

0.5

In [31]:
dE_o2_dout_h1 = dE_o2_dout_o2 * dout_o2_dnet_o2 * dnet_o2_dout_h1
dE_o2_dout_h1

0.007312499999999944

In [32]:
dE_total_dout_h1 = dE_o1_dout_h1 + dE_o2_dout_h1
dE_total_dout_h1

0.37436249999999993

In [33]:
dout_h1_dnet_h1 = 1
dout_h1_dnet_h1

1

In [34]:
dnet_h1_dw2 = x2
dnet_h1_dw2

0.1

In [35]:
dE_total_dw2 = dE_total_dout_h1 * dout_h1_dnet_h1 * dnet_h1_dw2
dE_total_dw2

0.03743625

$w_2^\text{(next)} = w_2 - \eta  \frac{\partial E_\text{total}}{\partial w_2}$

In [36]:
w2_next = w2 - eta * dE_total_dw2
w2_next

0.181281875