In [None]:
#tiny autograd engine
#implements backpropagation 
#allows you to build out mathematical expression

import math
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#scalar value function
def f(x):
    return 3*x**2 - 4*x + 5

print(f(3.0))

xs = np.arange(-5, 5, 0.25)
#print(xs)
ys = f(xs)
#print(ys)
#plt.plot(xs, ys)
#plt.show()

In [None]:
#we're not going to write out the derivatives -- nobody can write out the expression for a neural net, it'd be 10s of thousands of terms!
#we want to really understand what the derivative is measuring and tells you about the function
h = 0.001
x = 3.0
print(f(x+h)) #do we expect f to increase when we very slightly increase x?
#yes, it gets very very slightly greater
print(f(x+h) - f(x)) #how positively did the function respond?
print((f(x+h) - f(x))/h) #the slope!

x = -3 #ok slope at 3 is 14; what's the slope at -3?
print((f(x+h) - f(x))/h) #the slope is about -22
x = 2/3
print((f(x+h) - f(x))/h) #at this point the slope is about 0

In [None]:

#let's get more complex
print("-----------")
a = 2.0
b = -3.0
c = 10.0
d = a*b + c
print(d, "\n")
#what is this derivative telling us?

#"get a bit tacky here", start at a very small value of h
h = 0.0001
#evaluate all inputs wrt h
a = 2.0
b = -3.0
c = 10.0

d1 = a*b + c
print("d1:", d1) #4.0

a += h
d2 = a*b+c
print("d2:", d2) #3.9999...
#slightly less than 4. since we're making a slightly more positive, but b is still negative, we're adding *less* to d
#so the slope will be negative
print('slope:', (d2-d1)/h) #yep, -3.000...10772 etc
#differentiating d wrt a gives us b

#ok but what's the influence of b?
a = 2.0
b += h
d3 = a*b+c
print("d3:", d3) #we're making b slightly less negative, so my guess is the slope will be positive?

print('slope:', (d3-d1)/h) #yeah, it's 2.00, because differentiating d with respect to b gives us a

#ok but what's the influence of c?
b -= h 
c += h
d4 = a*b+c
print("d4:", d4) #we're making c slightly more positive so i think the slope will be positive but small

print('slope:', (d4-d1)/h) #0.9999...or 1. that's the rate at which d will increase as we scale c, so it's not much


In [None]:
#so here's what the derivative tells us about the function
#let's move to neural networks
#these are MASSIVE expressions. we need some data structures to hold those

class Value:
    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self._prev = set(_children)
        #children is a tuple but a set within the class for efficiency
        self._op = _op
        self.label = label
    
    def __repr__(self):
        return f"Value(data={self.data})"
    
    def __add__(self, other):
        out = Value(self.data + other.data, (self, other), '+')
        return out
    
    def __mul__(self, other):
        out = Value(self.data * other.data, (self, other), '*')
        return out
    


In [None]:
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='d')
#print(a+b) 
e = a*b; e.label='e'

#we need to add addition to Value because this won't work
print(a.__add__(b))
print(a.__mul__(b))
print(a*b) 
#the above two lines mean the same thing!
#d = a*b+c
d = e+c; d.label='d'
print(d)

print(d._prev)



In [None]:
#these expressions are about to get larger!
from graphviz import Digraph
def trace(root):
    #builds a set of all edges and nodes in a graph
    nodes, edges = set(), set()

    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)

    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR = left to right

    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        #for any value in the graph create a rectangular record node for it)
        dot.node(name = uid, label = "{ data %.4f }" % (n.data, ), shape='record')
        if n._op:
            #if this value is a result of some operation create a node for it
            dot.node(name=uid+n._op, label = n._op)
            #and connect this node to it
            dot.edge(uid + n._op, uid)
    
    for n1, n2 in edges:
        #connect n1 to the op node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)

    return dot

draw_dot(d)