In [None]:
# Following this blog post: https://vmartin.fr/understanding-automatic-differentiation-in-30-lines-of-python.html

In [1]:
import numpy as np
from collections import namedtuple

In [2]:
class Tensor:
    def __init__(self, value=None):
        self.value = value

    def __repr__(self):
        return f"T:{self.value}"
    
    def __add__(self, other):
        return Tensor(value = self.value + other.value)

In [3]:
# getting comforatable with named tuple

Student = namedtuple('Student', ['name', 'age', 'DOB'])

s = Student('Nidhi', '21', '2541997')

print(f"Age {s[1]}")
s

Age 21


Student(name='Nidhi', age='21', DOB='2541997')

In [9]:
Children = namedtuple('Children', ['a', 'b', 'op']) # child a, child b, operation

# ! This whole thing is just a tree of chain rule
class Tensor:
    def __init__(self, value=None, children=None, name=None):
        self.value = value # Tensor value
        self.children = children # Children object (named tuple)
        self.name = name # name of the tensor

    def forward(self):
        # recursively compute forward pass
        if self.children is None:
            return self # leaf node
    
        # compute forward for children
        a = self.children.a.forward() # recursively compute forward pass
        b = self.children.b.forward() # recursively compute forward pass

        if a.value is not None: 
            # If the operation does not need a term b (like exp(a) for example)
            # Use only a
            if self.children.b is None:
                self.value = self.children.op(a.value)
            # Else if op needs a second term b and his value is not None after forward pass
            elif b.value is not None:
                self.value = self.children.op(a.value, b.value)
        return self
    
    def grad(self, deriv_to):
        # Derivative of a tensor with itself is 1
        if self is deriv_to:
            return Tensor(1)
        
        # if leaf node, derivative is 0
        if self.children is None:
            return Tensor(0)
        
        if self.children.op is np.add: # (a+b)' = a' + b'
            t = self.children.a.grad(deriv_to) + self.children.b.grad(deriv_to)
        elif self.children.op is np.subtract: # (a-b)' = a' - b'
            t = self.children.a.grad(deriv_to) - self.children.b.grad(deriv_to)
        elif self.children.op is np.multiply: # (a*b)' = a'*b + a*b'
            t = self.children.a.grad(deriv_to)*self.children.b + \
                self.children.a*self.children.b.grad(deriv_to)
        elif self.children.op is np.divide: # (a/b)' = (a'*b - a*b')/b^2
            t = (self.children.a.grad(deriv_to)*self.children.b - \
                self.children.a*self.children.b.grad(deriv_to))/self.children.b**2
        elif self.children.op is np.exp: # (exp(a))' = exp(a)*a'
            t = self.children.a.grad(deriv_to)*self.children.op(self.children.a)
        else:
            raise NotImplementedError(f"Gradient not implemented for {self.children.op}")
        
        return t

    def __repr__(self):
        return f"T:{self.value}"
    
    def __add__(self, other):
        c = Children(self, other, np.add)
        t = Tensor(children=c)
        return t.forward()
    
    def __sub__(self, other):
        c = Children(self, other, np.subtract)
        t = Tensor(children=c)
        return t.forward()

    def __mul__(self, other):
        c = Children(self, other, np.multiply)
        t = Tensor(children=c)
        return t.forward()

    def __truediv__(self, other):
        c = Children(self, other, np.divide)
        t = Tensor(children=c)
        return t.forward()

    def __neg__(self):
        c = Children(Tensor(value=np.zeros_like(self.value)), self, np.subtract)
        t = Tensor(children=c)
        return t.forward()

    def exp(self):
        c = Children(self, None, np.exp)
        t = Tensor(children=c)
        return t.forward()

In [7]:
x = Tensor(3)
y = Tensor(5)
z1 = x + y
z2 = z1 * y
print(x, y)
print(z2)

T:3 T:5
T:40


$$z1 = 3 + 5$$
$$z2 = z1 * 5 = 8 * 5 = 40$$

In [8]:
print(x, y)
print(z2.grad(y))

T:3 T:5
T:13
