# Micrograd evaluation

In [None]:
import graphviz
import random

from micrograd.core import Value

In [None]:
def build_graph(root: Value):
    """Build a graph with GraphViz"""
    visited = set()    
    _get_value_str = lambda v: f'{v.name} | data={v.data:0.3f} | grad={v.grad:0.3f}'

    def _expand_value(value):
        """Expand children"""
        if value in visited:
            return
        visited.add(value)
        
        if value.operator:
            # connect operator node
            operator_id = f'{id(value)}_{value.operator}'
            graph.node(operator_id, label=value.operator)
            graph.edge(operator_id, str(id(value)))
            # connect children
            for child in value.children:
                child_id = str(id(child))
                graph.node(child_id, label=_get_value_str(child), shape='record')  # child nodes
                graph.edge(child_id, operator_id)
                _expand_value(child)
            
    # process
    graph = graphviz.Digraph(format='svg', graph_attr={'rankdir': 'LR'})
    graph.node(str(id(root)), _get_value_str(root), shape='record')
    _expand_value(root)
    return graph

### Create a computation graph

In [None]:
a = Value(3, name='a')
b = Value(2, name='b')
c = a * b
d = c + 1
c.name = 'c'
d.name = 'd'

build_graph(d)

### Compare numerical and approximate derivative

In [None]:
def test_deriv():
    # manual deriv
    a = Value(3, name='a')
    b = Value(2, name='b')
    c = a * b
    d = c + 1
    c.name = 'c'
    d.name = 'd'
    L1 = d.data
    
    h = 0.001  # step
    a = Value(3 + h, name='a')  # adding step to a
    b = Value(2, name='b')
    c = a * b
    d = c + 1
    c.name = 'c'
    d.name = 'd'
    L2 = d.data
    
    print('Approximate derivative dL/da:', (L2-L1) / h)
    
    # numerical deriv
    d.zero_grad()
    d.backward()
    
    print('Numerical derivative dL/da:', a.grad)

test_deriv()

### Simulate data updates and gradient computation

In [None]:
a = Value(3, name='a')
b = Value(-2, name='b')
c = a * b; c.name = 'c'
d = c + 1; d.name = 'd'
e = d * b ; e.name = 'e'
e.backward()

In [None]:
build_graph(e)

In [None]:
print('original data on e:', e.data)

# weight update
a.data += 0.01 * a.grad
b.data += 0.01 * b.grad

# "forward pass"
c = a * b; c.name = 'c'
d = c + 1; d.name = 'd'
e = d * b ; e.name = 'e'

e.zero_grad()
e.backward()
print('modified data on e:', e.data)
build_graph(e)

In [None]:
z = sum([Value(2), 5, 6, 7])
z.zero_grad()
z.backward()
build_graph(z)

## A Multilayer Perceptron
### Classes

In [None]:
class Neuron:
    """
    Neuron.
    """
    
    def __init__(self, n_inputs: int, activation: str = 'tanh'):
        self.weights = [Value(random.uniform(-1, 1), name=f'w_{i}') for i in range(n_inputs)]
        self.bias = Value(random.uniform(-1, 1), 'b')
        self.activation = activation
        
    def __call__(self, x: list):
        if len(x) != len(self.weights):
            raise ValueError('Input should have the same size of weights')
        z = sum([w * i for w, i in zip(self.weights, x)]) + self.bias
        if self.activation == 'relu':
            out = z.relu()
        elif self.activation == 'tanh':
            out = z.tanh()
        else:
            raise ValueError(f'Wrong activation type: {self.activation}')
        return out
    
    def parameters(self):
        return self.weights + [self.bias]

class Layer:
    """
    Fully connected layer of neurons.
    """
    
    def __init__(self, n_inputs: int, n_outputs: int):
        self.neurons = [Neuron(n_inputs) for _ in range(n_outputs)]
        
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        p = []
        for n in self.neurons:
            p.extend(n.parameters())
        return p
    
class MLP:
    """
    Multilayer perceptron.
    """
    
    def __init__(self, n_inputs: int, layer_sizes: list):
        self.sizes = [n_inputs] + layer_sizes
        self.layers = [Layer(_i, _o) for _i, _o in zip(self.sizes, self.sizes[1:])]
                       
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        p = []
        for l in self.layers:
            p.extend(l.parameters())
        return p

### Check components

In [None]:
n = Neuron(2)  # a neuron with two inputs
o = n([Value(x_i, name=f'x_{i}')
       for i, x_i
       in enumerate([1, 3])])
o

In [None]:
l = Layer(2, 3)  # a layer with 3 neurons of input size 2
o = l([Value(x_i, name=f'x_{i}')
       for i, x_i
       in enumerate([1, 3])])
o

In [None]:
m = MLP(3, [4, 4, 1])  # an MLP with 3 and 2 neurons. input size 2
o = m([Value(x_i, name=f'x_{i}')
       for i, x_i
       in enumerate([1, 2, 3])])
o

### Network in action

#### Data

In [None]:
xs = [
    [2, 3, -1],
    [3, -1, 0.5],
    [0.5, 1, 1],
]
ys = [1, -1, -1]

# predictions
y_pred = [m(x) for x in xs]
y_pred

#### Model

In [None]:
m = MLP(3, [4, 4, 1])  # an MLP with 3 and 2 neurons. input size 2
# build_graph(m([0, 0, 0]))

In [None]:
print(len(m.parameters()))

#### Training using Gradient Descent

In [None]:
n_epochs = 100000
lr = 0.001
for epoch in range(n_epochs):
    # forward
    ys_pred = [m(x) for x in xs]
    L = sum((y - yp)**2 for y, yp in zip(ys, ys_pred)) / len(ys)  # MSE
    if epoch % 2000 == 0:
        print(f'epoch: {epoch}, loss: {L.data}')
        
    # backward
    L.zero_grad()
    L.backward()
    
    # update
    for p in m.parameters():
        p.data += -1 * lr * p.grad
    
print('Predictions:', [m(x) for x in xs])
print('Targets:', ys)