Making this a part 4, since Andrej seems like we're going to eat our dessert

In [1]:
import math, random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Value:
    def __init__(self, data, _children=(), _op=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None 
        self._prev = set(_children)
        self._op = _op
        
    def __repr__(self):
        return f'Value(data={self.data})'

    # addition
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        
        # back propogation specifically for the addition
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        
        # writing the line below *saves* the function into out._backward as the _backward function
        # and we are not calling the function itself!
        # you'll get errors if you write out._backward = _backward(), since we're not trying to save
        # a function call, but a function itself
        out._backward = _backward
        return out
    
    # negation, which will be used for subtraction
    def __neg__(self):
        return self * -1
    
    # subtraction--the addition of the negation of something else
    def __sub__(self, other):
        return self + (-other)
    
    # multiplication
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        
        # back propogation specifically for multiplication
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
            
        out._backward = _backward
        return out

    # anything raised to any power by the power rule
    def __pow__(self, other):
        assert isinstance(other, (int, float)), 'only suppporting ints and floats for now'
        out = Value(self.data**other, (self,), f'**{other}')
        
        def _backward():
            self.grad += (other*self.data**(other-1)) * out.grad
            
        out._backward = _backward
        return out
    
    # true division for the constant k = -1 in (a/b) = a*b**-1
    def __truediv__(self, other):
        return self * other**-1
    
    # since python doesn't know the difference b/n e.g. 2 * a and a * 2
    # need a rescue function that'll save a failure from happening by reversing
    # order of multiplication
    def __rmul__(self, other):
        return self * other
    
    def __radd__(self, other): # other + self
        return self + other

    def __sub__(self, other): # self - other
        return self + (-other)

    def __rsub__(self, other): # other - self
        return other + (-self)
    
    # temporary activation function
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self,), 'tanh')
        
        def _backward():
            self.grad += (1 - t**2) * out.grad
        
        out._backward = _backward
        return out
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')
        
        def _backward():
            # local derivative of e^x was just calculated for out
            # multiply this by out.grad for chen lu
            self.grad += out.data * out.grad
            
        out._backward = _backward
        return out
    
    # backpropogation to be performed given neuron connections
    def backward(self):
        # first need to topologicall sort connected neurons
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        
        # build the topological sort to prepare for backpropogation
        build_topo(self)

        # now we want the back propogation to happen after the sort
        self.grad = 1.0 # since gradient of last neuron will be 1
        for node in reversed(topo):
            # will call whatever backprop method is needed from other methods
            node._backward()

In [3]:
from graphviz import Digraph

def trace(root):
    # construct graph with nodes and connected edges
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    
    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'})
    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        
        # for every value in the graph, create a node for it
        dot.node(name = uid, label='{data = %.2f | grad %.2f}' % (n.data, n.grad), shape='record')
        
        # if the current node is an operation, create a node for it
        if n._op:
            dot.node(name=uid + n._op, label=n._op)
            dot.edge(uid + n._op, uid)
            
    for n1, n2 in edges:
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)
        
    return dot

In [4]:
class Neuron:
    # constructor function
    # takes in number of inputs: nin
    # weights are randomly initialized, and assigned with the number of inputs there are
    # bias is also initialized to be some random number
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1.0, 1.0)) for _ in range(nin)]
        self.b = Value(random.uniform(-1.0, 1.0))
        
    # call function -- allows you to use an object's method as if it were a function
    def __call__(self, x):
        # we want sum(wi * xi) + b for all of the weights and inputs
        # eventually to feed into activation function    
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    # we want a way to be able to access the parameters of the network
    # define this in order to act like PyTorch and call for them
    def parameters(self):
        return self.w + [self.b]
    

# to make a layer of neurons, specify how many inputs + outputs there are
class Layer:
    def __init__(self, nin, nout):
        # layer of neurons is literally a list of neurons
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x):
        # outputs will be the activated neuron signals from the layer of neurons
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    # let's call the parameters for the layer itself using list comprehension
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
    
# now we're making a layer of layers -- a multilayer perceptron (MLP)
class MLP:
    # nin is the number of inputs
    # nouts specifies the number of neurons that there will be 
    # ex. [4, 4, 1] specifies four neurons in second and third layers, and one neuron serves as the output layer
    # of the MLP
    def __init__(self, nin, nouts):
        # size of the MLP, basically specifies neuron geometry
        sz = [nin] + nouts 
        # construct layers by (number of neurons, number of outconnecting neurons)
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
        
    def __call__(self, x):
        # iterates through all of the layers in order to obtain an output
        for layer in self.layers:
            x = layer(x)
        return x
    
    # be able to call parameters of the MLP like how PyTorch does
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
    

In [23]:
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)

Value(data=0.31360688232152)

We're going to make a training loop so we don't have to do this by hand

In [24]:
# define input data set
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

# specify how our neural net should classify these inputs
# these are the targets of the neural network, they are the desired Bayseian posterior (?)
# given any of the list of inputs, we want the output to match with the element of each target
ys = [1.0, -1.0, -1.0, 1.0]

In [30]:
for k in range(20):
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt)**2 for yout, ygt in zip(ypred, ys))

    # prior to backpropogation, you have to zero your grads
    for p in n.parameters():
        p.grad = 0.0
    # do backpropogation after calculating the loss
    loss.backward()
    
    # now update all of the networks parameters
    step_size = 0.01
    for p in n.parameters():
        p.data += -step_size * p.grad
        
    print(k, loss.data)

0 0.06417028402021124
1 0.06314234263677111
2 0.06214414888920264
3 0.06117448560552535
4 0.06023219912515967
5 0.059316195308163996
6 0.05842543583557702
7 0.057558934776833576
8 0.056715755402431886
9 0.05589500722200426
10 0.055095843229732
11 0.05431745734065357
12 0.05355908200286563
13 0.052819985971927184
14 0.05209947223496669
15 0.05139687607305968
16 0.050711563251420386
17 0.05004292832783105
18 0.04939039307053075
19 0.048753404977514785


In [31]:
ypred

[Value(data=0.9037144625783332),
 Value(data=-0.948509803716335),
 Value(data=-0.8564824661929197),
 Value(data=0.8725873733198362)]