In [None]:
from network.value import Value
from network.network import MLP
from network.graph import draw_graph

### Manual Backpropagation on an Expression

In [None]:
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
f = Value(-2.0, label='f')
e = a * b
e.label = 'e'
d = e + c
d.label = 'd'
L = d * f
L.label = 'L'


In [None]:
# Backpropagation
L.grad = 1

d.grad = f.data # dL/dd = d/dd(d * f) = f
f.grad = d.data # dL/df = d/df(d * f) = d

c.grad = d.grad # dL/dc = dL/dd * dd/dc = f * d/dc(e + c) = f * 1 = f
e.grad = d.grad # dL/de = dL/dd * dd/de = f * d/de(e + c) = f * 1 = f

a.grad = e.grad * b.data # dL/da = dL/de * de/da = f * d/da(a * b) = f * b
b.grad =  e.grad * a.data # dL/db = dL/de * de/db = f * d/db(a * b) = f * a

In [None]:
draw_graph(L)

In [None]:
# Single optimisation step. Increasing L by moving in the direction of the gradient.
a.data += 0.01 * a.grad
b.data += 0.01 * b.grad
c.data += 0.01 * c.grad
f.data += 0.01 * f.grad

# Forward pass
e = a * b
d = e + c
L = d * f

print(L.data)

### Manual Backpropagation on a Neuron

In [None]:
# Inputs x1, x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

# Weights w1, w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')

# Bias of the neuron
b = Value(6.8813735870195432, label='b')

# n = x1 * w1 + x2 * w2 + b
x1w1 = x1 * w1
x1w1.label = 'x1 * w1'
x2w2 = x2 * w2
x2w2.label = 'x2 * w2'
x1w1x2w2 = x1w1 + x2w2
x1w1x2w2.label = 'x1 * w1 + x2 * w2'
n = x1w1x2w2 + b
n.label = 'n'
o = n.tanh()
o.label = 'o'


In [None]:
# Backpropagation
o.grad = 1

n.grad = 1 - o.data**2 # do/dn = d/dn(tanh(n)) = 1 - tanh(n)**2 = 1 - o**2

x1w1x2w2.grad = n.grad # do/dx1w1x2w2 = do/dn * dn/dx1w1x2w2 = do/dn * d/dx1w1x2w2((x1 * w1 + x2 * w2) + b) = do/dn
b.grad = n.grad # do/db = do/dn * dn/db = do/dn * d/db((x1 * w1 + x2 * w2) + b) = do/dn

x1w1.grad = x1w1x2w2.grad # do/dx1w1 = do/dx1w1x2w2 * dx1w1x2w2/dx1w1 = do/dx1w1x2w2 * d/dx1w1(x1 * w1 + x2 * w2) = do/dx1w1x2w2
x2w2.grad = x1w1x2w2.grad # do/dx2w2 = do/dx1w1x2w2 * dx1w1x2w2/dx2w2 = do/dx1w1x2w2 * d/dx2w2(x1 * w1 + x2 * w2) = do/dx1w1x2w2

x1.grad = x1w1.grad * w1.data #do/x1 = do/x1w1 * dx1w1/dx1 = do/x1w1 * d/dx1(x1 * w1) = do/x1w1 * w1
w1.grad = x1w1.grad * x1.data #do/w1 = do/x1w1 * dx1w1/dw1 = do/x1w1 * d/dw1(x1 * w1) = do/x1w1 * x1
x2.grad = x2w2.grad * w2.data #do/x2 = do/x2w2 * dx2w2/dx2 = do/x2w2 * d/dx2(x2 * w2) = do/x2w2 * w2
w2.grad = x2w2.grad * x2.data #do/w2 = do/x2w2 * dx2w2/dw2 = do/x2w2 * d/dw2(x2 * w2) = do/x2w2 * x2



In [None]:
draw_graph(o)

In [None]:
# Single optimisation step. Increasing o by moving in the direction of the gradient.
w1.data += 0.01 * w1.grad
w2.data += 0.01 * w2.grad
b.data += 0.01 * b.grad

# Forward pass
x1w1 = x1 * w1
x2w2 = x2 * w2
x1w1x2w2 = x1 * w1 + x2 * w2
n = x1w1x2w2 + b
o = n.tanh()

print(o.data)

### Semi-Automatic Backpropagation on a Neuron

In [None]:
# Inputs x1, x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

# Weights w1, w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')

# Bias of the neuron
b = Value(6.8813735870195432, label='b')

# n = x1 * w1 + x2 * w2 + b
x1w1 = x1 * w1
x1w1.label = 'x1 * w1'
x2w2 = x2 * w2
x2w2.label = 'x2 * w2'
x1w1x2w2 = x1w1 + x2w2
x1w1x2w2.label = 'x1 * w1 + x2 * w2'
n = x1w1x2w2 + b
n.label = 'n'
o = n.tanh()
o.label = 'o'


In [None]:
# Backpropagation
o.grad = 1
o._backward()
n._backward()
b._backward()
x1w1x2w2._backward()
x1w1._backward()
x2w2._backward()

In [None]:
draw_graph(o)

### Automatic Backpropagation on a Neuron

In [None]:
# Inputs x1, x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

# Weights w1, w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')

# Bias of the neuron
b = Value(6.8813735870195432, label='b')

# n = x1 * w1 + x2 * w2 + b
x1w1 = x1 * w1
x1w1.label = 'x1 * w1'
x2w2 = x2 * w2
x2w2.label = 'x2 * w2'
x1w1x2w2 = x1w1 + x2w2
x1w1x2w2.label = 'x1 * w1 + x2 * w2'
n = x1w1x2w2 + b
n.label = 'n'
o = n.tanh()
o.label = 'o'

In [None]:
# Backpropagation
o.backward()
draw_graph(o)

### Forward Pass in an MLP

In [None]:
x = [2, 3, -1] # Input vector
n = MLP(3, [4, 4, 1])
draw_graph(n(x))

### Backpropagation in an MLP

In [None]:
# Training dataset
xs = [
    [2.0, 3.0, 1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, 1.0, -1.0] # Desired outputs

ypred = [n(x) for x in xs]

In [None]:
# Loss function - mean squared error.
loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
print(loss)

In [None]:
loss.backward()
# Graph shows four forward passes for each x in xs and then the loss calculation.
draw_graph(loss)

### Gradient Descent in an MLP

In [None]:
# Gradient descent
for i in range(12):

    # Forward pass
    ypred = [n(x) for x in xs]

    # Calculating loss
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))

    # Backward pass
    n.zero_grad()
    loss.backward()

    # Stochastic gradient descent update
    learning_rate = 0.05
    for p in n.parameters():
        p.data -= learning_rate * p.grad
        
    print(f'Step {i}: {loss.data}')

In [None]:
print(ypred)