In [242]:
import math
import random
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline

In [243]:
class Value:

    def __init__(self, data, _children=(), _op="", label=""):
        self.data = data
        self.prev = set(_children)
        self._op = _op
        self.label = label
        self.grad = 0.0
        self._backward = lambda: None
        
    def __repr__(self):
        return f"Value(data{self.data})"
    
    def __neg__(self):
        return self * -1

    def __add__(self, other):
        # enable integer addition
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), "+")

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out
    
    def __radd__(self, other):
        return self + other
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        return self - other 

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), "*")

        def _backward():
            self.grad += out.grad * other.data
            other.grad += out.grad * self.data
        out._backward = _backward

        return out
    
    # in the case of int * Value, __mul__ fails and python will swap the other of the
    # operands and tries __rmul__
    def __rmul__(self, other): # other * self
        return self * other 
    
    def __pow__(self, other):
        assert isinstance(other, (int, float))
        x = self.data
        v = math.pow(x, other)
        out = Value(v, (self, ), _op = f"^{other}")

        def _backward():
            self.grad += other * self.data**(other-1) * out.grad
        out._backward = _backward

        return out

    def __truediv__(self, other):
        return self * other**-1
    
    def tanh(self):
        t = (math.exp(2*self.data)-1)/(math.exp(2*self.data)+1)
        out = Value(t, (self, ), _op = 'tanh')

        def _backward():
            self.grad = (1 - t**2) * out.grad 
        out._backward = _backward

        return out
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), _op='exp')

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward

        return out

    def backward(self):
        # Use topological sort to reverse the order of nodes
        topo = []
        visited = set()

        def tbuild(v):
            if v not in visited:
                visited.add(v)
                for child in v.prev:
                    tbuild(child)
                topo.append(v)

        tbuild(self)
        self.grad = 1.0
        
        for node in reversed(topo):
            node._backward()

In [244]:
from graphviz import Digraph

def trace(root):
    nodes, edges = set(), set()

    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v.prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges

def draw_dot(root):
    # Set rankdir to 'LR' (Left to Right) and add it properly in the graph attributes
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'})
    
    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        # Use records for value nodes (rectangular shape with compartments)
        dot.node(name = uid, label = f"{{ {n.label} | data {n.data:.4f} | grad {n.grad:.4f} }}", shape='record')
        
        if n._op:
            op_node_name = uid + n._op
            dot.node(name=op_node_name,
                    label=n._op)
            dot.edge(op_node_name, uid)
    
    for n1, n2 in edges:
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)
    
    return dot

In [245]:
class Neuron: 
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for i in range(nin)] # nin number of weights for nin number of edges coming into the neuron
        self.b = Value(random.uniform(-1, 1)) # only one bias is needed per neuron
    
    def __call__(self, x): # uses self(x) to call
        # wx+b
        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b) # output of the single neuron is the weighted, wi, sum of the inputs, xi: sum(wi,xi)
        out = act.tanh() # apply activation before returing output of the neuron; similar to Hebbian theory of a neuron
        return out
    
class Layer: 
    def __init__(self, nin, num): # num is the number of neurons in a single layer
        self.neurons = [Neuron(nin) for _ in range(num)] # a layer consisting of num number of neurons, each having nin inputs; list of neurons in the layer
    # Layer(a, b) makes a layer of b neurons, each of which takes a inputs

    def __call__(self, x):
        outs = [n(x) for n in self.neurons] # list of outputs from each neuron in the layer accessed from self.neurons
        return outs[0] if len(outs) == 1 else outs
    
class MLP: 
    def __init__(self, nin, size): # nin is the number of inputs, this forms the first layer; size is a list containing number of neurons in consecutive layers
        layers = [nin] + size # nin forms the first layer of inputs, layers is an exhaustive list of number of neurons in all layers
        self.layers = [Layer(layers[i], layers[i+1]) for i in range(len(size))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

### Example

In [246]:
# Example 
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets

n = MLP(3, [4, 4, 1]) # Define a NN with 3 inputs, 2 hidden layers each with 4 neurons, and the output layer

ypred = [n(x) for x in xs]
ypred

[Value(data-0.49875053448866385),
 Value(data0.624363837927369),
 Value(data0.43269545045660734),
 Value(data-0.34674433215627387)]

The result from my run was: 

        Value(data-0.2979093155514126)
        Value(data-0.44825670817767793)
        Value(data-0.3319981622581929)
        Value(data-0.2963201334909924)

Thus we want the first and last to go up, and the middle two to get lower. We do this using a loss function along with backpropagation.

In [248]:
loss = sum((y1-y2)**2 for y1, y2 in zip(ys, ypred))
loss.backward()  # Actually call backward() to perform backpropagation
# draw_dot(loss)