In [1]:
import numpy as np

In [104]:
class Value:

  def __init__(self, data, child = (), op = '', label = ''):
    self.data = data;
    self.grad = 0.0
    self._prev = set(child)
    self._backward = lambda : None
    self._op = op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data}, grad={self.grad})"

  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')

    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad

    out._backward = _backward
    return out

  def __neg__(self):
    out = Value(-self.data, (self, ), '-')

    def _backward():
      self.grad += -1.0 * out.grad

    out._backward = _backward
    return out

  def __sub__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data - other.data, (self, other), '-')

    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += -1.0 * out.grad

    out._backward = _backward
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')

    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad

    out._backward = _backward
    return out

  def __rmul__(self, other):
    return self * other

  def exp(self):
    out = Value(np.exp(self.data), (self,), 'exp')

    def _backward():
      self.grad += out.data * out.grad

    out._backward = _backward
    return out

  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int and float for now"
    out = Value(self.data**other, (self,), '**')

    def _backward():
      self.grad += other * (self.data**(other - 1)) * out.grad

    out._backward = _backward
    return out

  def __truediv__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data / other.data, (self, other), '\\')

    def _backward():
      self.grad += 1.0 / other.data * out.grad
      other.grad += (-self.data / other.data**2) * out.grad

    out._backward = _backward
    return out

  def tanh(self):
    out = Value(np.tanh(self.data), (self, ), 'tanh')

    def _backward():
      self.grad += (1 - out.data**2) * out.grad

    out._backward = _backward
    return out

  def backward(self):
    topo = []
    visited = set()

    def topo_sort(v):
      if v not in visited:
        visited.add(v)
        for node in v._prev:
          topo_sort(node)
        topo.append(v)
    topo_sort(self)

    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [96]:
class Neuron:

  def __init__(self, nin):
    self.w = [Value(np.random.uniform(-1, 1)) for _ in range(nin)]
    self.b = Value(np.random.uniform(-1, 1))

  def __call__(self, x):
    act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
    out = act.tanh()

    return out

  def parameters(self):
    return self.w + [self.b]

class Layer:

  def __init__(self, nin, nout):
    self.neurons = [Neuron(nin) for _ in range(nout)]

  def __call__(self, x):
    out = [n(x) for n in self.neurons]
    return out

  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:

  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)

    return x

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

In [112]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right

  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [113]:
xs = [[2.0, 3.0, -1.0],
      [3.0, -1.0, 0.5],
      [0.5, 1.0, 1.0],
      [1.0, 1.0, -1.0]]

ys = [1.0, -1.0, -1.0, 1.0]

In [114]:
net = MLP(3, [4, 2, 1])

In [115]:
net.parameters()

[Value(data=-0.13139971411286933, grad=0.0),
 Value(data=0.05424011171946619, grad=0.0),
 Value(data=-0.5696004243179844, grad=0.0),
 Value(data=-0.19466857951255756, grad=0.0),
 Value(data=-0.9532550558458774, grad=0.0),
 Value(data=-0.9776484287934333, grad=0.0),
 Value(data=0.9372210877561071, grad=0.0),
 Value(data=0.26469118288173865, grad=0.0),
 Value(data=0.14302486303120943, grad=0.0),
 Value(data=0.9541608706059868, grad=0.0),
 Value(data=-0.6078885705201111, grad=0.0),
 Value(data=-0.0957573344669016, grad=0.0),
 Value(data=0.24458369840453997, grad=0.0),
 Value(data=-0.344744905843513, grad=0.0),
 Value(data=-0.8771621739618338, grad=0.0),
 Value(data=0.04560490330058542, grad=0.0),
 Value(data=-0.1570711076306086, grad=0.0),
 Value(data=-0.8089059693034171, grad=0.0),
 Value(data=-0.18337028615544892, grad=0.0),
 Value(data=-0.923781298521047, grad=0.0),
 Value(data=0.28534836955689147, grad=0.0),
 Value(data=-0.5768321138873902, grad=0.0),
 Value(data=-0.47376022767113213,

In [124]:
for epoch in range(20):
  ypred = [net(x)[0] for x in xs]
  loss = (ypred[0] - ys[0])**2 + (ypred[1] - ys[1])**2 + (ypred[2] - ys[2])**2 + (ypred[3] - ys[3])**2

  for p in net.parameters():
    p.grad = 0.0
  loss.backward()

  for p in net.parameters():
    p.data += -0.1 * p.grad

  print(f"Epoch: {epoch}, Loss: {loss.data}")

Epoch: 0, Loss: 0.008121702351973877
Epoch: 1, Loss: 0.00800834346131073
Epoch: 2, Loss: 0.007897971319050234
Epoch: 3, Loss: 0.007790470701869471
Epoch: 4, Loss: 0.00768573220718296
Epoch: 5, Loss: 0.007583651891357708
Epoch: 6, Loss: 0.007484130934520397
Epoch: 7, Loss: 0.00738707532970752
Epoch: 8, Loss: 0.007292395594324092
Epoch: 9, Loss: 0.007200006502068371
Epoch: 10, Loss: 0.007109826833651378
Epoch: 11, Loss: 0.007021779144794371
Epoch: 12, Loss: 0.006935789550125168
Epoch: 13, Loss: 0.006851787521718876
Epoch: 14, Loss: 0.006769705701139947
Epoch: 15, Loss: 0.006689479723943124
Epoch: 16, Loss: 0.006611048055682365
Epoch: 17, Loss: 0.006534351838557845
Epoch: 18, Loss: 0.006459334747906083
Epoch: 19, Loss: 0.006385942857805103


In [125]:
ypred

[Value(data=0.963745996886953, grad=-0.0725080062260941),
 Value(data=-0.9711035864340304, grad=0.05779282713193923),
 Value(data=-0.9496983298680991, grad=0.1006033402638018),
 Value(data=0.9586922600345873, grad=-0.08261547993082541)]