# Multi-layer perceptron

This notebook is used during testing/validation.

See `src/mlp.py` for the clean version of the MLP

In [2]:
import numpy as np

In [404]:
class Value:

    def __init__(self, data, children=[], label=''):
        self.data = data
        self.grad = 0.0
        self.label = label
        self.prev = children
        self._backward = lambda : None

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, [self, other], f"({self.label})+{other.label}")

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
            
        out._backward = _backward
        return out
    
    def __sub__(self, other):
        out = Value(self.data - other.data, [self, other], f"({self.label})+{other.label}")

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
            
        out._backward = _backward
        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, [self, other], f"({self.label})*{other.label}")

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
            
        out._backward = _backward
        return out
    
    def __pow__(self, other):
        out = Value(self.data ** other.data, [self, other], f"({self.label})^{other.label}")
        
        def _backward():
            self.grad += other.data * self.data * out.grad
            sign = 1 if self.data > 0 else -1
            other.grad += self.data ** other.data * sign * np.log(abs(self.data)) * out.grad
            
        out._backward = _backward
        return out
    
    def sig(self):
        y = 1 / (np.exp(-self.data) + 1)
        out = Value(y, [self], f"(sig({self.label})")
        
        def _backward():
            self.grad += y * (1 - y) * out.grad
            
        out._backward = _backward
        return out

    def relu(self):
        out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad

        out._backward = _backward
        return out

    def tanh(self):
        out = Value((np.exp(2*self.data) - 1) / (np.exp(2*self.data) + 1), [self], f"(tanh({self.label})")

        def _backward():
            self.grad += (1 - out.data**2) * out.grad

        out._backward = _backward 
        return out
    
    def backward(self):
        topo = []
        visited = set()
        def topo_sort(node):
            if node not in visited:
                visited.add(node)
                for child in node.prev:
                    topo_sort(child)
                topo.append(node)

        self.grad = 1
        topo_sort(self)
        nodes = reversed(topo)
        for node in nodes:
            node._backward()

In [407]:
# validate the gradients
mlp = MLP(3, [3, 2, 1])

out_before = mlp([4,1,5])
out_before.backward()

print("out_before:", out_before)
grad = mlp.layers[0].neurons[0].w[0].grad
print("Gradient using backwards:", grad)

h = 1e-5
mlp.layers[0].neurons[0].w[0].data += h


out_after = mlp([4,1,5])
print("out_after:", out_after)


a = out_after.data
b = out_before.data
manual_grad = (a - b)/h
print("Gradient by hand:", manual_grad)


out_before: Value(data=0.5888810564717789, grad=1)
Gradient using backwards: -2.8245869576938945e-06
out_after: Value(data=0.5888810564435326, grad=0.0)
Gradient by hand: -2.824629419251323e-06


# --

In [402]:
np.random.seed(5)
class Neuron:

    def __init__(self, num_input):
        self.w = [Value(w) for w  in np.random.randn(num_input)]
        self.b = Value(np.random.randn(1)[0])

    def __call__(self, xs):
        # out = np.dot(self.w, [Value(x) for x  in xs]) + self.b
        # print([w*x for w, x in zip(self.w, xs)])
        # out = sum([w*x for w, x in zip(self.w, xs)]) + self.b
        out = np.dot(self.w, xs) + self.b
        return out.sig()

n = Neuron(3)
target = Value(3)
out = n([3,4,5])

loss = (out - target) ** Value(2)
loss.backward()

n.w[0]
loss


Value(data=4.000027108143415, grad=1)

In [5]:
class Layer:

    def __init__(self, num_input, num_neurons):
        # number of neurons is equivalent to number of outputs
        self.num_input = num_input
        self.num_neurons = num_neurons
        self.neurons = [Neuron(num_input) for _ in range(num_neurons)]

    def __call__(self, xs):
        outputs = [n(xs) for n in self.neurons]
        return outputs if len(outputs) > 1 else outputs[0]
    
    def __repr__(self):
        return  f"Layer({self.num_input=}, {self.num_neurons=})"
    
l = Layer(3,4)
out = l([3,4,5])
print(out)

[Value(data=0.8206791118848349, grad=0.0), Value(data=0.0009815753433625469, grad=0.0), Value(data=0.0004586693901978576, grad=0.0), Value(data=0.9815206068237023, grad=0.0)]


In [354]:
class MLP:

    def __init__(self, num_inputs, outs):
        # number of outputs is for each layers
        self.layers = [Layer(num_inputs, outs[0])] + [Layer(outs[i], outs[i+1]) for i in range(len(outs) - 1)]

    def __call__(self, xs):
        outs = xs
        for layer in self.layers:
            outs = layer(outs)
        return outs
    
    def parameters(self):
        params = []
        for layer in self.layers:
            for neuron in layer.neurons:
                for weight in neuron.w:
                    params.append(weight)
        return params
    
    
mlp = MLP(3, [3, 2, 1])
print(mlp([4,1,5]))
print(mlp.parameters())

mlp.layers[0].neurons[0].w[0].data= -0.1917395684517186
print(mlp([4,1,5]))


print(mlp.parameters())


Value(data=0, grad=0.0)
[Value(data=-0.3633108784819174, grad=0.0), Value(data=0.0032888429341100755, grad=0.0), Value(data=-0.10593044205742323, grad=0.0), Value(data=-0.6315716297922155, grad=0.0), Value(data=-0.0061949084857593475, grad=0.0), Value(data=-0.10106761180924467, grad=0.0), Value(data=0.24921765856490757, grad=0.0), Value(data=0.19766009104249851, grad=0.0), Value(data=1.3348485742415819, grad=0.0), Value(data=1.5615322934488904, grad=0.0), Value(data=-0.3058530211666308, grad=0.0), Value(data=-0.47773141727821256, grad=0.0), Value(data=0.3554384723493521, grad=0.0), Value(data=0.269612406446701, grad=0.0), Value(data=1.2919633833879631, grad=0.0), Value(data=0.49444039812108825, grad=0.0), Value(data=-0.3363362591365529, grad=0.0)]
Value(data=0, grad=0.0)
[Value(data=-0.1917395684517186, grad=0.0), Value(data=0.0032888429341100755, grad=0.0), Value(data=-0.10593044205742323, grad=0.0), Value(data=-0.6315716297922155, grad=0.0), Value(data=-0.0061949084857593475, grad=0.

In [401]:
# validate the gradients
mlp = MLP(3, [3, 2, 1])

out_before = mlp([4,1,5])
out_before.backward()

print("out_before:", out_before)
grad = mlp.layers[0].neurons[0].w[0].grad
print("Gradient using backwards:", grad)

h = 1e-5
mlp.layers[0].neurons[0].w[0].data += h


out_after = mlp([4,1,5])
print("out_after:", out_after)


a = out_after.data
b = out_before.data
manual_grad = (a - b)/h
print("Gradient by hand:", manual_grad)



out_before: Value(data=-0.6939227666304026, grad=1)
Gradient using backwards: 0.10724593739600777
out_after: Value(data=-0.6939216941140519, grad=0.0)
Gradient by hand: 0.1072516350730801


In [7]:
def main():

    a = Value(3, label='a')
    b = Value(4, label='b')
    c = Value(2, label='c')

    g = (a * b)
    h = g ** c
    h.backward()
    # h.grad = 1

    # h._backward()
    # g._backward()

    print(f"{a.grad=}")
    print(f"{b.grad=}")
    print(f"{c.grad=}")

    # test if grads make sense
    e = 1e-6
    for elt in np.diag([e]*3):
        before = (a.data * b.data) ** c.data
        after = ((a.data + elt[0]) * (b.data + elt[1])) ** (c.data + elt[2])
        print((after - before)/e)

main()


a.grad=96.0
b.grad=72.0
c.grad=357.8265575694721
96.0000160148411
72.00000899842962
357.82700220465813
