In [117]:
import math
import random

In [118]:
class Value:
    def __init__(self, data, _children=(), _op=''):
        self.data = data
        self.grad = 0
        self._backward = lambda: None 
        self._prev = set(_children)
        self._op = _op

    def __repr__(self):
        return f'Value({self.data})'
    
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward
        return out
    
    def __radd__(self, other):
        return self + other

    def __neg__(self):
        return self * -1

    def __sub__(self, other):
        return self + (-other)

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out

    def __rmul__(self, other):  
        return self * other  # a * b == b * a

    def __truediv__(self, other):
        return self * other**-1
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)), 'only support int or float'
        out = Value(self.data**other, (self,), f'**{other}')
        def _backward():
            self.grad += other * self.data**(other - 1) * out.grad
        out._backward = _backward
        return out

    def exp(self):
        out = Value(math.exp(self.data), (self,), 'exp')
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out
    
    def tanh(self):
        e2x = (2 * self).exp()
        return (e2x - 1) / (e2x + 1)
    
    def backward(self):
        topo = []
        visited = set()
        def topo_sort(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    topo_sort(child)
                topo.append(v)

        topo_sort(self)
        self.grad = 1
        for v in reversed(topo):
            v._backward()

In [119]:
a = Value(2.5)
b = Value(-1.5)
c = Value(3.0)
e = Value(4.0)
d = a + (b * c * e)

d.backward()

In [120]:
d.grad, c.grad, b.grad, a.grad,

(1, -6.0, 12.0, 1)

In [121]:
class Neuron:
    def __init__(self, nin: int):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))

    def __call__(self, x):
        act = sum((xi*wi for xi, wi in zip(x, self.w)), start=self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]

In [122]:
n = Neuron(2)
x = [Value(3.), Value(2.)]
n(x)

Value(0.9999753967467575)

In [123]:
class Layer:
    def __init__(self, nin: int, nout: int):
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs if len(outs) != 1 else outs[0]
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

In [124]:
layer = Layer(2, 5)
layer(x)

[Value(0.9791749472886889),
 Value(0.4197622671780622),
 Value(-0.7820509284065802),
 Value(-0.9911296424399676),
 Value(-0.9782888524137416)]

In [125]:
class MLP:
    def __init__(self, nin: int, nouts: list[int]):
        inouts = [nin] + nouts
        self.layers = [Layer(inouts[i], inouts[i+1]) for i in range(len(inouts)-1)]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [153]:
mlp = MLP(3, [4, 4, 1])

xs = [
    [2., 3., -1.],
    [3., -1., 0.5],
    [0.5, 1.0, 1.0],
    [1., 1.0, -1.],
]

ys = [1., -1., -1., 1.]

ypred = [mlp(x) for x in xs]
loss = sum((yp - y)**2 for yp, y in zip(ypred, ys))
loss

Value(6.769396070860063)

In [160]:
for _ in range(20):
    ypred = [mlp(x) for x in xs]
    loss = sum((yp - y)**2 for yp, y in zip(ypred, ys))
    print(loss.data)

    loss.grad = 0
    for p in mlp.parameters():
        p.grad = 0

    loss.backward()
    for p in mlp.parameters():
        p.data -= 0.2 * p.grad

0.0019194351021543988
0.0018970141674332862
0.0018750948342804571
0.00185366061476451
0.001832695733068085
0.0018121850875103059
0.0017921142149728915
0.0017724692575542635
0.0017532369312905328
0.0017344044967956976
0.0017159597316844764
0.0016978909046533972
0.0016801867511046242
0.0016628364502064294
0.0016458296032927696
0.0016291562135113617
0.0016128066666370312
0.0015967717129732376
0.0015810424502705005
0.0015656103075955474


In [163]:
ypred = [mlp(x).data for x in xs]
for yp, y in zip(ypred, ys):
    print(f"expected={y}, got={yp}")

expected=1.0, got=0.9812693814524013
expected=-1.0, got=-0.9761542576087325
expected=-1.0, got=-0.9850586753908456
expected=1.0, got=0.9798067251876353
