In [4]:
import math


class Value:
    def __init__(self, data, children=None, op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(children) if children else ()
        self._op = op
        self.label = label

    def __repr__(self):
        return f"Value({self.data})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = self.data + other.data
        out = Value(out, (self, other), "+")

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad

        out._backward = _backward
        return out

    def __radd__(self, other):
        return self + other

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = self.data * other.data
        out = Value(out, (self, other), "*")

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward
        return out

    def __sub__(self, other):  # self-other
        return self + (-other)

    def __rsub__(self, other):  # other -self
        return -self + other

    def __neg__(self):  # -self
        return self * -1

    def __truediv__(self, other):
        # x/y == x*y**-1
        return self * other ** -1

    def __rmul__(self, other):
        return self * other

    def __pow__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = self.data ** other.data
        out = Value(out, (self, other), "pow")

        def _backward():
            self.grad += other.data * (self.data ** (other.data - 1)) * out.grad

        out._backward = _backward
        return out

    def exp(self):
        out = Value(math.exp(self.data), (self,), "exp")

        def _backward():
            self.grad += out.data * out.grad

        out._backward = _backward
        return out

    def tanh(self):
        # tanh x = e的2x次方减1 除以 e的2x次方加1
        x = self.data
        v = (math.exp(2 * x) - 1) / (math.exp(2 * x) + 1)
        out = Value(v, (self,), 'tanh')

        def _backward():
            self.grad += (1 - v ** 2) * out.grad

        out._backward = _backward
        return out

    def backward(self):
        # 拓扑排序，只会沿着从左向右一个方向
        topo = []
        visited = set()

        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)

        build_topo(self)
        self.grad = 1
        for node in reversed(topo):
            node._backward()

In [14]:
import random


class Neuron:
    def __init__(self, n_in):
        self.weight = [Value(random.uniform(-1, 1)) for _ in range(n_in)]
        self.bias = Value(random.uniform(-1, 1))

    def __call__(self, x):
        # w*x +b
        act = sum(w * x for w, x in zip(self.weight, x)) + self.bias
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.weight + [self.bias]


class Layer:
    def __init__(self, n_in, n_out):
        self.neurons = [Neuron(n_in) for _ in range(n_out)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]


class MLP:
    def __init__(self, n_in, n_outs):
        sz = [n_in] + n_outs
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(n_outs))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]

In [56]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]  # desired targets
m = MLP(3, [4, 4, 1])


In [63]:
for k in range(20):
    # forward process
    ypred = [m(x) for x in xs]
    loss = sum([(yout - ygt) ** 2 for yout, ygt in zip(ys, ypred)])
    
    # backward process
    # after each update, remember to init the new grad
    # the final loss is changed, so the local derivatives should be changed
    for p in m.parameters():
        p.grad = 0
    loss.backward()
    
    # update parameters
    for p in m.parameters():
        p.data += -0.1 * p.grad 
    
    print(k, loss)

0 Value(0.004681766727090079)
1 Value(0.004617289408884947)
2 Value(0.004554575629617143)
3 Value(0.00449355239529506)
4 Value(0.004434150789942387)
5 Value(0.004376305687710162)
6 Value(0.004319955489551471)
7 Value(0.004265041882007742)
8 Value(0.004211509615933404)
9 Value(0.004159306303230845)
10 Value(0.004108382229879395)
11 Value(0.004058690183729633)
12 Value(0.004010185295697836)
13 Value(0.003962824893139459)
14 Value(0.003916568364308243)
15 Value(0.0038713770329192993)
16 Value(0.003827214041934074)
17 Value(0.0037840442457731964)
18 Value(0.0037418341102413433)
19 Value(0.0037005516195179554)


In [64]:
ypred

[Value(0.9763988755653319),
 Value(-0.9695442214205039),
 Value(-0.9708474460408598),
 Value(0.9630390382469098)]