In [71]:
import numpy as np

from typing import List

# Automatic Differentiation Experiment

A big part about the way I'm going to define things is that variables are functions and functions are variables; there's no difference between the two!

What I want is

```
x, a = Variable()
y, z, b, c = Parameter(1), Parameter(1), Parameter(1), Parameter(1)
f = ((x + y) * z) / (a * b / c)

step_size = 0.05

for (x_train, a_train) in train:
    parameter_grads = f.get_grad(x_train, a_train)
    for (parameter, grad) in parameter_grads:
        parameter.value -= step_size * parameter_grad
        
for (x_test, a_test) in test:
    print(f([x_test, a_test]))
```

Note that I should _not_ have to do a forward pass every single time.  
This is poor design for the user; what I should do instead is just have very lazy execution.  
Only ever compute anything when the user needs it. I know tf switched to eager; I should read more about
how to do eager computation and implement, maybe.

Later, training an MLP in the background would look like this:

```
class MLP:
    def __init__(self, input_layer_size, output_layer_size):
        x = Variable()
        self.input = x
        
        self.b = Parameter(`some np vector`)
        self.W = Parameter(`some np matrix`)
        self.f = W * x + b
        
    def forward_pass(self, input):
        return f([input])
        
    def get_gradients(self, input):
        return f.get_grad([input])
        
```

Want to model

$$ f(\cdot) = \frac{(x+y+z)*w}{(a+b)*c)} $$

How should I design this? We want to get something like

```
x = Variable(some_number)
y = Variable(some_number)
.
.
.
c = Variable(some_number)

f = ((x + y + z) * w)
print(f.grad(x))
```

In [125]:
class Add:
    def __init__(self):
        self.f = lambda x, y: x + y
        
    def __call__(self, x, y):
        return self.f(x, y)
    
    def get_grad(self, x, y):
        return (1, 1)
    
class Multiply:
    def __init__(self):
        self.f = lambda x, y: x*y
        
    def __call__(self, x, y):
        return self.f(x, y)
    
    def get_grad(self, x, y):
        return (y, x)
    
class Divide:
    def __init__(self):
        self.f = lambda x, y: x / y
        
    def __call__(self, x, y):
        return self.f(x, y)
    
    def get_grad(self, x, y):
        return (1/y, - x / y**2)
    
class Exponent:
    def __init__(self):
        self.f = lambda x, y: x ** y
        
    def __call__(self, x, y):
        return self.f(x, y)
    
    def get_grad(self, x, y):
        return (y * x ** (y-1), x**y * np.log(x))
    
add = Add()
multiply = Multiply()
divide = Divide()
exp = Exponent()

In [202]:
class Node:
    def __init__(self, parent0, parent1, primitive, eager=True):
        self.parent0 = self.ensure_node(parent0)
        self.parent1 = self.ensure_node(parent1)
        self.primitive = primitive
        self.value = None
        if eager:
            self.value = self.compute()
            
    def __str__(self):
        return str(self.value)
        
    def ensure_node(self, node):
        if isinstance(node, Node) or isinstance(node, Variable):
            return node
        return Variable(node)
    
    def __neg__(self):
        return 
        
    def __add__(self, node):
        node = self.ensure_node(node)
        return Node(self, node, add)
    
    def __radd__(self, node):
        return Node(node, self, add)
    
    def __mul__(self, node):
        return Node(self, node, multiply)
    
    def __rmul__(self, node):
        return Node(node, self, multiply)
    
    def __truediv__(self, node):
        return Node(self, node, divide)
    
    def __rtruediv__(self, node):
        return Node(node, self, divide)
    
    def __pow__(self, node):
        return Node(self, node, exp)
    
    def __rpow__(self, node):
        return Node(node, self, exp)
    
    def compute(self):
        if self.value is None:
            self.value = self.primitive(self.parent0.compute(), self.parent1.compute())
            self.compute_gradient()
        return self.value
    
    def compute_gradient(self):
        """
        Computes the gradient with respect to parent0, parent1.
        """
        parent0_grad, parent1_grad = self.primitive.get_grad(self.parent0.compute(), self.parent1.compute())
        self.grad_dict = {}
        for key in self.parent0.grad_dict:
            self.grad_dict[key] = self.parent0.grad_dict[key] * parent0_grad
        for key in self.parent1.grad_dict:
            self.grad_dict[key] = self.parent1.grad_dict[key] * parent1_grad
            
    def get_grad(self, var: Variable):
        """
        Returns the gradient of Node with respect to variable, using the 
            position of the variable passed in in memory.
        """
        return self.grad_dict[id(var)]
    
    
class Variable(Node):
    def __init__(self, value):
        self.value = value
        self.grad_dict = {id(self): 1}
        
    def compute(self):
        return self.value

In [199]:
x = Variable(4)
x.grad_dict

{4512159392: 1}

In [204]:
x = Variable(4)
y = Variable(3)
z = Variable(2)
w = x * y
a = w / z
print(a)
print(a.grad_dict)
print(a.get_grad(x), a.get_grad(z))

6.0
{4512229416: 1.5, 4512229192: 2.0, 4512230928: -3.0}
1.5 -3.0


In [205]:
x = Variable(2)
denominator = 1 + np.e**(-x)
f = 1 / (1 + np.e**(x * -1))
print(f.compute())
print(f.get_grad(x))

TypeError: bad operand type for unary -: 'Variable'