In [16]:
import random
import math


class Value:
    def __init__(self, data, _children=(), _op="", label="") -> None:
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)

        self._op = _op
        self.label = label

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), "+")

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad

        out._backward = _backward
        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), "*")

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward
        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supported int/float"
        out = Value(self.data**other, (self,), f"**{other}")

        def _backward():
            self.grad += other * self.data ** (other - 1) * out.grad

        out._backward = _backward
        return out

    def exp(self):
        """
        This represents e^x
        """
        x = self.data
        out = Value(math.exp(x), (self,), "exp")

        def _backward():
            self.grad += out.data * out.grad

        out._backward = _backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2 * x) - 1) / (math.exp(2 * x) + 1)
        out = Value(t, (self,), "tanh")

        def _backward():
            self.grad += (1 - t**2) * out.grad

        out._backward = _backward
        return out
    

    def backward(self):

        # topological order all of the children in the graph
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = 1
        for v in reversed(topo):
            v._backward()


    # def backward(self):
    #     self.grad = 1
    #     visited = []
    #     toVist = []
    #     toVist.extend(list(self._prev))
    #     self._backward()
    #     visited.append(self)
    #     while True:
    #         current_node = toVist[0]
    #         toVist.pop(0)

    #         if current_node not in visited:
    #             current_node._backward()
    #         toVist.extend(list(current_node._prev))

    #         if len(toVist) == 0:
    #             break

    def __repr__(self):
        return f"Data: {self.data}"

    def __sub__(self, other):
        return self + (-other)

    def __radd__(self, other):
        return self + other

    def __rmul__(self, other):
        return self * other

    def __truediv__(self, other):
        return self * other**-1






class Module:
    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

    def update_params(self, lr=1e-2):
        for p in self.parameters():
            p.data -= lr * p.grad


class Neuron(Module):
    """
    A simple neuron
    Single computaion
    """

    def __init__(self, input_units, activation="tanh"):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(input_units)]
        self.b = Value(random.uniform(-1, 1))
        self.activation = activation
        self.inputs = input_units

    def __call__(self, x):
        """
        returns the computed value by the neuron
        """

        out = sum((w * x for w, x in zip(self.w, x)), self.b)
        if self.activation == "tanh":
            return out.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]

    def __repr__(self) -> str:
        return f"Neuron ({self.inputs})"


class Layer(Module):
    def __init__(self, input_units, output_units, activation="tanh"):
        super().__init__()
        self.input_units = input_units
        self.output_units = output_units
        self.layer = [
            Neuron(input_units=input_units, activation=activation)
            for _ in range(output_units)
        ]

    def __call__(self, x):
        out = [n(x) for n in self.layer]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.layer for p in n.parameters()]

    def __repr__(self) -> str:
        return f"Layer ({self.input_units}: {self.output_units})"


class MLP(Module):
    def __init__(self, layers, activation="tanh"):
        """
        Args:
            layers: List of units in a layers

        Eg: [2, 2, 1]. It will create a NN, which has 2 input units,
        2 hidden units and 1 output unit.
        """
        super().__init__()
        self.layers = layers
        self.mlp = [
            Layer(input_units=i, output_units=j, activation=activation)
            for i, j in zip(layers[:-1], layers[1:])
        ]

    def __call__(self, x):
        out = x
        for layer in self.mlp:
            out = layer(out)
        return out

    def parameters(self):
        return [p for layer in self.mlp for p in layer.parameters()]

    def __repr__(self) -> str:
        return f"MLP: ({self.layers})"


In [17]:
n = MLP([3, 4, 4, 1])
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -2.0]
]

ys = [1.0, -1.0, -1.0, 1.0]
ypred = [n(x) for x in xs]
ypred

[Data: 0.4616920630903097,
 Data: -0.07712393818339165,
 Data: -0.1728040113617075,
 Data: 0.3919329137957273]

In [18]:
loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))

In [19]:
loss

Data: 2.195474445358336

In [20]:
loss.backward()

In [21]:
n.mlp[0].layer[0].w[0].grad, n.mlp[1].layer[0].w[0].grad

(87.33983786951119, -0.3424544161257401)