In [39]:
import collections
import math

class Value:
    # `Value` represents, ahem, a value with some additional information
    # for backpropagation to work
    def __init__(self, data, depends_on=None):
        if depends_on is None:
            depends_on = []
            
        self.data = data
        self.grad = 0
        self.depends_on = depends_on
        self._backward = []

    def __add__(self, other):
        other = Value.ensure(other)
        res = Value(
            data=self.data + other.data,
            depends_on=[self, other],
        )
        self.add_backward(lambda: res.grad)
        other.add_backward(lambda: res.grad)
        return res


    def __sub__(self, other):
        other = Value.ensure(other)
        res = Value(
            self.data - other.data,
            depends_on=[self, other],
        )
        self.add_backward(lambda: res.grad)
        other.add_backward(lambda: -res.grad)
        return res

    def __mul__(self, other):
        other = Value.ensure(other)
        res = Value(
            self.data * other.data,
            depends_on=[self, other],
        )
        self.add_backward(lambda: res.grad * other.data)
        other.add_backward(lambda: res.grad * self.data)
        return res

    def __truediv__(self, other):
        other = Value.ensure(other)
        res = Value(
            self.data / other.data,
            depends_on=[self, other],
        )
        self.add_backward(lambda: res.grad / other.data)
        other.add_backward(lambda: res.grad * -self.data / (other.data ** 2))
        return res

    def __pow__(self, other):
        try:
            self.data ** other
        except OverflowError:
            print(f'{self.data=}, {other=}')
        res = Value(
            self.data ** other,
            depends_on=[self],
        )
        self.add_backward(lambda: res.grad * other * (self.data ** (other - 1)))
        return res

    def tanh(self):
        p = 2 * self.data
        res = Value(
            (math.exp(p) - 1) / (math.exp(p) + 1),
            depends_on=[self]
        )
        self.add_backward(lambda: res.grad * (1 - res.data ** 2))
        return res
    
    def backward(self):
        self.grad = 1
        for node in self._iter_topo_sort():
            if node is self:
                continue
            node.calc_gradient()
            
    def calc_gradient(self):
        for f in self._backward:
            self.grad += f()
        
    def _iter_topo_sort(self):
        visited = set()
        nodes = [self]
        in_degrees = collections.Counter({self: 0})
        while nodes:
            node = nodes.pop()
            for dep in node.depends_on:
                in_degrees[dep] += 1
                if dep not in visited:
                    visited.add(dep)
                    nodes.append(dep)
        queue = [node for (node, count) in in_degrees.items() if count == 0]
        assert queue, "can't toposort graph with cycle"
        while queue:
            new_queue = []
            for node in queue:
                yield node
                for dep in node.depends_on:
                    in_degrees[dep] -= 1
                    if in_degrees[dep] == 0:
                        new_queue.append(dep)
            queue = new_queue
            
    def add_backward(self, f):
        self._backward.append(f)

    def __radd__(self, other):
        return self + other

    def __rsub__(self, other):
        return -self + other

    def __rmul__(self, other):
        return self * other

    def __rtruediv__(self, other):
        return (self ** -1) * other

    def __neg__(self):
        return self * -1
    
    def zero_grad(self):
        self.grad = 0
        self._backward = []

    def __repr__(self):
        return f'Value(data={self.data})'

    @staticmethod
    def ensure(data):
        if isinstance(data, (int, float)):
            return Value(data)
        assert isinstance(data, Value), f'{data=} should be an instance of Value'
        return data


def test_value_operations():
    # calculate pi using Leibniz formula
    pi = Value(0.0)  
    for i in range(1000):
        if i % 2 == 0:
            delta = 1.0 / (2 * i + 1)
        else:
            delta = -Value(1.0) / (Value(2) * Value(i) + Value(1))
        pi += delta
    pi *= 4
    print(pi)
    assert math.isclose(pi.data, math.pi, rel_tol=0.001)


test_value_operations()

Value(data=3.140592653839794)


In [60]:
import random


def pairwise(iterable):
    prev = None
    for i, item in enumerate(iterable):
        if i != 0:
            yield prev, item    
        prev = item


class Neuron:
    def __init__(self, input_size):
        self.weights = [Value(random.uniform(-1.0, 1.0)) for _ in range(input_size)]
        self.bias = Value(0)

    def __call__(self, input_):
        return sum((x * w for (x, w) in zip(input_, self.weights)), self.bias)

    def parameters(self):
        return [*self.weights, self.bias]


class Layer:
    def __init__(self, input_size, output_size):
        self.neurons = [Neuron(input_size) for _ in range(output_size)]

    def __call__(self, input_):
        return [n(input_) for n in self.neurons]

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]


class MLP:
    def __init__(self, input_size, layer_sizes):
        assert layer_sizes
        self.layers = []
        sizes = [input_size, *layer_sizes]
        for layer_input_size, layer_output_size in pairwise(sizes):
            self.layers.append(Layer(layer_input_size, layer_output_size))

    def __call__(self, input_):
        x = input_
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]


def calc_loss(pred_ys, ys):
    return sum((py - y) ** 2 for py, y in zip(pred_ys, ys)) / len(ys)


def item(x):
    assert len(x) == 1
    return x[0]


def test_mlp():
    mlp = MLP(1, (4, 9, 10, 1))
    print(mlp([1, 2, 3]))


def test_grad():
    x = Value(3.0)
    y = Value(4.0)
    for i in range(2):
        x.zero_grad()
        y.zero_grad()
        z = x * y
        z.backward()
        assert x.grad == 4.0, f'{i=}, {x.grad=}'
        assert y.grad == 3.0, f'{i=}, {y.grad=}'
        assert z.grad == 1.0, f'{i=}, {z.grad=}'


def test_mlp_training():
    xs = range(10)
    ys = [(5 * x + 93) for x in xs]
    mlp = MLP(1, (1,))
    num_iter = 10000
    learning_rate = 1e-2
    # gradient descent
    for i in range(num_iter):
        for p in mlp.parameters():
            p.zero_grad()
        pred_ys = [item(mlp([x])) for x in xs]
        loss = calc_loss(pred_ys, ys)
        loss.backward()
        for p in mlp.parameters():
            p.data -= learning_rate * p.grad
        if i % 1000 == 0:
            print(f'{loss=}\n{ys=}\n{pred_ys=}\n')

test_grad()
test_mlp()
test_mlp_training()

[Value(data=-1.8746015426384717)]
loss=Value(data=12457.91064888493)
ys=[93, 98, 103, 108, 113, 118, 123, 128, 133, 138]
pred_ys=[Value(data=0.0), Value(data=0.9953896308891226), Value(data=1.9907792617782452), Value(data=2.9861688926673677), Value(data=3.9815585235564903), Value(data=4.976948154445613), Value(data=5.9723377853347355), Value(data=6.967727416223858), Value(data=7.963117047112981), Value(data=8.958506678002102)]

loss=Value(data=0.0283021821169566)
ys=[93, 98, 103, 108, 113, 118, 123, 128, 133, 138]
pred_ys=[Value(data=92.68735459345281), Value(data=97.7372136360279), Value(data=102.78707267860301), Value(data=107.8369317211781), Value(data=112.8867907637532), Value(data=117.93664980632829), Value(data=122.9865088489034), Value(data=128.0363678914785), Value(data=133.0862269340536), Value(data=138.1360859766287)]

loss=Value(data=3.410022354112321e-07)
ys=[93, 98, 103, 108, 113, 118, 123, 128, 133, 138]
pred_ys=[Value(data=92.9989147727141), Value(data=97.99908783904522)