In [None]:
from value import Value, draw_dot, graph_size

# Neural Networs

In [None]:
import random

class Neuron:
    
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(0)
        
    def __call__(self, x):
        # return tanh(w * x + b)
        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

In [None]:
n = Neuron(3)
n.w, n.b

In [None]:
n([1, -1, 2])

In [None]:
class Layer:
    
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs

In [None]:
l = Layer(5, 3)

In [None]:
l([1, -1, 2, 0, 0.9])

In [None]:
class Layer:
    
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs if len(outs) > 1 else outs[0]

In [None]:
l = Layer(5, 1)
l([1, -1, 2, 0, 0.9])

In [None]:
class MLP:
    
    def __init__(self, nin, nout):
        sz = [nin] + nout
        self.layers = [Layer(zi, zo) for zi, zo in zip(sz, sz[1:])]
        
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

In [None]:
m = MLP(3, [4, 4, 1])
inp = [-1, 0.5, 3]
m(inp)

In [None]:
draw_dot(m(inp))

With all the available tools, lets train the network with a simple dataset:

In [None]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, -1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0] # targets

In [None]:
ypred = [m(x) for x in xs]
ypred

In [None]:
[(yout-ygt)**2 for ygt, yout in zip(ys, ypred)]

In [None]:
loss = sum([(yout-ygt)**2 for ygt, yout in zip(ys, ypred)])
loss

In [None]:
draw_dot(loss)

In [None]:
graph_size(loss)

In [None]:
loss.backward()
draw_dot(loss)

In [None]:
m.layers[0].neurons[1].w[1].grad

Remember that we want to **minimize** the loss, so we need to **substract** the gradient.

Now, we need to add a mechanisms to track which nodes contains values that can be updated, in contrast with input nodes and temporary nodes.

In [None]:
import random

class Neuron:    
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))
        
    def __call__(self, x):
        # return tanh(w * x + b)
        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]
    

class Layer:    
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs if len(outs) > 1 else outs[0]
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]
    

class MLP:    
    def __init__(self, nin, nout):
        sz = [nin] + nout
        self.layers = [Layer(zi, zo) for zi, zo in zip(sz, sz[1:])]
        
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]


In [None]:
m = MLP(3, [4, 4, 1])
m.parameters()

In [None]:
for l in m.layers:
    print(len(l.parameters()))
print("-"*10)
print(len(m.parameters()))

layer1: 3 * 4 + 4

layer2: 4 * 4 + 4

layer3: 4 * 1 + 1

In [None]:
ypred = [m(x) for x in xs]
loss = sum([(yout-ygt)**2 for ygt, yout in zip(ys, ypred)])
print(loss)

for p in m.parameters():
    p.grad = 0
loss.backward()

learning_rate = 0.01
for p in m.parameters():
    p.data -= learning_rate * p.grad

    
ypred = [m(x) for x in xs]
loss = sum([(yout-ygt)**2 for ygt, yout in zip(ys, ypred)])
print(loss)

What about the learning rate?

In [None]:
def test_learning_rate(learning_rate):
    m = MLP(3, [4, 4, 1])
    ypred = [m(x) for x in xs]
    loss1 = sum([(yout-ygt)**2 for ygt, yout in zip(ys, ypred)])

    for p in m.parameters():
        p.grad = 0
    loss1.backward()

    for p in m.parameters():
        p.data -= learning_rate * p.grad
        
    ypred = [m(x) for x in xs]
    loss2 = sum([(yout-ygt)**2 for ygt, yout in zip(ys, ypred)])
    print(loss1.data, loss2.data)

In [None]:
test_learning_rate(0.01)

In [None]:
test_learning_rate(0.1)

In [None]:
test_learning_rate(1)

In [None]:
test_learning_rate(10)

# Example 1. Simple dataset

In [None]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]

# Each sample input has one desired output that we want our MLP to predict

ys = [1.0, -1.0, -1.0, 1.0] # desired predictions

In [None]:
P = MLP(3, [4, 4, 1])

In [None]:
# Writing out the training loop

epochs = 20
learning_rate = 0.1

for i in range(epochs):

    # forward pass
    y_predicted = [P(x) for x in xs]

    # recalculate loss
    loss = sum([(y_output - y_ground_truth)**2 for y_ground_truth, y_output in zip(ys, y_predicted)])

    # backward pass
    
    # COMMON BUG: Forgetting to zero-out your gradients before
    # running the next backward pass
    
    for p in P.parameters():
        p.grad = 0
    y_predicted = [P(x) for x in xs]
    loss.backward()

    # gradient updates
    for p in P.parameters():
        p.data += -learning_rate * p.grad
        
    # print current loss
    print(f'i={i}: loss={loss}')

In [None]:
y_predicted, ys

# Solving moon problem

In [None]:
import numpy as np

np.random.seed(1337)
random.seed(1337)

In [None]:
from sklearn.datasets import make_moons, make_blobs
import matplotlib.pyplot as plt

X, y = make_moons(n_samples=100, noise=0.1)

y = y*2 - 1 # make y be -1 or 1
# visualize in 2D
plt.figure(figsize=(5,5))
plt.scatter(X[:,0], X[:,1], c=y, s=20, cmap='jet')

In [None]:
# initialize a model 
model = MLP(2, [32, 16, 1]) # 3-layer neural network
print(model)
print("number of parameters", len(model.parameters()))

In [None]:
# loss function
def loss(batch_size=None):
    
    # inline DataLoader :)
    if batch_size is None:
        Xb, yb = X, y
    else:
        ri = np.random.permutation(X.shape[0])[:batch_size]
        Xb, yb = X[ri], y[ri]
    inputs = [list(map(Value, xrow)) for xrow in Xb]
    
    # forward the model to get scores
    scores = list(map(model, inputs))
    
    # svm "max-margin" loss
    losses = [(1 + -yi*scorei).relu() for yi, scorei in zip(yb, scores)]
    data_loss = sum(losses) * (1.0 / len(losses))
    # L2 regularization
    alpha = 1e-4
    reg_loss = alpha * sum((p*p for p in model.parameters()))
    total_loss = data_loss + reg_loss
    
    # also get accuracy
    accuracy = [(yi > 0) == (scorei.data > 0) for yi, scorei in zip(yb, scores)]
    return total_loss, sum(accuracy) / len(accuracy)

total_loss, acc = loss()
print(total_loss, acc)


In [None]:
# optimization
n_epoch = 50

for k in range(n_epoch):
    
    # forward
    total_loss, acc = loss()
    
    # backward
    for p in model.parameters():
        p.grad = 0
    total_loss.backward()
    
    # update (sgd)
    learning_rate = 0.1 
    for p in model.parameters():
        p.data -= learning_rate * p.grad
    
    if k % 1 == 0:
        print(f"step {k} loss {total_loss.data}, accuracy {acc*100}%")

In [None]:
# visualize decision boundary

h = 0.25
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
Xmesh = np.c_[xx.ravel(), yy.ravel()]
inputs = [list(map(Value, xrow)) for xrow in Xmesh]
scores = list(map(model, inputs))
Z = np.array([s.data > 0 for s in scores])
Z = Z.reshape(xx.shape)

fig = plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())