In [77]:
import random
# from micrograd.engine import Value
import numpy as np



In [78]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = f"{n.label} | data {n.data:.4f} | grad {n.grad:.4f}", shape="record")
    if n._op:
      uid_op = uid + n._op
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid_op, label = n._op)
      # and connect this node to it
      dot.edge(uid_op, uid)

  for n1, n2 in edges:
  #   # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2))+n2._op)

  return dot




In [79]:

class Value:
    def __init__(self, data, _children=(), _op='',label=""):
        self.grad = 0.0
        self.label = label
        self._prev = set(_children)
        self.data = data
        self._op = _op
        self._backward = lambda: None


    def __repr__(self):
        return f"Value(data={self.data})"
    
    def __add__(self, other):
        if not isinstance(other, Value):
            other = Value(other)

        out = Value(self.data + other.data, (self, other),"+")
        
        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward
        return out
    
    def __radd__(self, other): # other + self
        return self + other
    
    def __rmul__(self, other):
        return self * other
    
    def __mul__(self, other):
        if not isinstance(other, Value):
            other = Value(other)
        out = Value(self.data * other.data, (self, other),"*")
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out
    
    def exp(self):
        out = Value(np.exp(self.data), (self,), "exp")

        def _backward():
            self.grad += out.grad * out.data
        out._backward = _backward
        return out

    
    def tanh(self):
        out = Value(np.tanh(self.data), (self,), "tanh")
        def _backward():
            self.grad += out.grad * (1 - out.data**2)
        out._backward = _backward
        return out 
    
    def backward(self):
        topo = []
        visited_nodes = set()

        self.grad=1.0


        def topo_sort(node):
            if node not in visited_nodes:
                visited_nodes.add(node)
                for child in node._prev:
                    topo_sort(child)
                topo.append(node)

        topo_sort(self)

        for node in reversed(topo):
            node._backward()

    def __neg__(self): # -self
        return self * Value(-1.0)

    def __sub__(self, other): # self - other
        return self + (-other)

    def __pow__(self, n):
        assert isinstance(n, (int,float)), "only supporting int/float powers for now"
        out = Value(self.data**n, (self,), f"**{n}")

        def _backward():
            self.grad += n * self.data**(n-1) * out.grad
        out._backward = _backward
        return out
    
    def __truediv__(self, other):
        return self * other**-1

In [80]:
class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1)) 

    def __call__(self,x):   
        act = sum((wi*xi for xi,wi in zip(x,self.w)), self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]
    
class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self,x):
        outs = [neuron(x) for neuron in self.neurons]

        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        params = [p for neuron in self.neurons for p in neuron.parameters()]
        return params
    
class MLP:

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        # sz = [3, 4, 4, 1]
        # Layer(3,4) -> Layer(4,4) -> Layer(4,1)
        self.layers = [Layer(sz[i],sz[i+1]) for i in range(len(nouts))]
        # self.layers = [Layer(nin,nout) for nin,nout in zip(self.sz[:-1],self.sz[1:])]
    def __call__(self,x):
        for layer in self.layers:
            x = layer(x)

        return x
    
    
    def parameters(self):
        params = [p for layer in self.layers for p in layer.parameters()]
        return params

        


In [81]:
# len(n.parameters())

In [82]:
x=[2.0,3.0,-1.0]
n = MLP(3,[4,4,1])
n(x)

Value(data=0.25474991460819224)

In [83]:
xs = [ 
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

ys = [1.0, -1.0, -1.0, 1.0] # desired targets

In [84]:
ypred = [n(x) for x in xs]
loss = sum([(ypredi-ysi)**2 for ysi,ypredi in zip(ys,ypred)])
loss

Value(data=2.3044796719006557)

In [85]:
ypred = [n(x) for x in xs]
loss = sum([(ypredi-ysi)**2 for ysi,ypredi in zip(ys,ypred)])
loss

Value(data=2.3044796719006557)

In [86]:
loss.backward()

In [87]:
print(n.layers[0].neurons[0].w[0].data)
print(n.layers[0].neurons[0].w[0].grad)

-0.7203356125688549
0.20697077274124132


In [88]:
print(n.layers[0].neurons[0].w[0].data)
print(n.layers[0].neurons[0].w[0].grad)

-0.7203356125688549
0.20697077274124132


In [89]:
for p in n.parameters():
    p.data += p.grad * -0.5
    p.grad = 0.0

In [99]:
for k in range(1000):
    ypred = [n(x) for x in xs]
    loss = sum([(ypredi-ysi)**2 for ysi,ypredi in zip(ys,ypred)])

    loss.backward()

    for p in n.parameters():
        p.data += p.grad * -0.05
        p.grad = 0.0
    print(k,loss)

0 Value(data=1.61119918657703e-06)
1 Value(data=1.6104865656757279e-06)
2 Value(data=1.6097745696373222e-06)
3 Value(data=1.6090631976426026e-06)
4 Value(data=1.6083524488743432e-06)
5 Value(data=1.6076423225157652e-06)
6 Value(data=1.60693281775287e-06)
7 Value(data=1.6062239337718718e-06)
8 Value(data=1.6055156697611278e-06)
9 Value(data=1.6048080249099605e-06)
10 Value(data=1.604100998409457e-06)
11 Value(data=1.6033945894518782e-06)
12 Value(data=1.6026887972308786e-06)
13 Value(data=1.601983620941709e-06)
14 Value(data=1.6012790597810662e-06)
15 Value(data=1.6005751129467295e-06)
16 Value(data=1.5998717796378992e-06)
17 Value(data=1.5991690590556877e-06)
18 Value(data=1.5984669504015957e-06)
19 Value(data=1.5977654528796255e-06)
20 Value(data=1.5970645656944554e-06)
21 Value(data=1.596364288052299e-06)
22 Value(data=1.5956646191607513e-06)
23 Value(data=1.594965558228768e-06)
24 Value(data=1.5942671044669837e-06)
25 Value(data=1.5935692570868938e-06)
26 Value(data=1.59287201530167

In [100]:
print(f"ypred = {ypred}")
print(f"ys = {ys}")

ypred = [Value(data=0.9995825341719967), Value(data=-0.9995270202495362), Value(data=-0.9993379243201175), Value(data=0.9994709677143923)]
ys = [1.0, -1.0, -1.0, 1.0]
