In [674]:
import random
import math

# Node to store value
class Node:
    def __init__(self, val, parents=(), operation=None):
        self.val = val
        self.grad = 0.0
        self._backward = lambda : None
        self.parents = set(parents)
        self.op = operation
    
    def __repr__(self):
        return str(self.val)
    
    def __mul__(self, operand):
        operand = Node(operand) if not isinstance(operand, Node) else operand
        out = Node(self.val * operand.val, (self, operand), operation="*")
        
        def _backward():
            self.grad += operand.val * out.grad
            operand.grad += self.val * out.grad
        out._backward = _backward
        return out
    
    def __rmul__(self, operand):
        return self * operand
    
    def __add__(self, operand):
        operand = Node(operand) if not isinstance(operand, Node) else operand
        out = Node(self.val + operand.val, (self, operand), operation="+")
        
        def _backward():
            self.grad += 1.0 * out.grad
            operand.grad += 1.0 * out.grad
        out._backward = _backward
        return out
    
    def __radd__(self, operand):
        return self + operand
    
    def __neg__(self): 
        return self * -1
    
    def __sub__(self, other): 
        return self + (-other)
    
    def __rsub__(self, other): 
        return other + (-self)
    
    def __pow__(self, other):
        out = Node(self.val**other, (self,), "pow")

        def _backward():
            self.grad += (other * self.val**(other-1)) * out.grad
        out._backward = _backward

        return out
        
    
    def tanh(self):
        out = Node((math.exp(2 * self.val) - 1) / (math.exp(2 * self.val) + 1), (self,), "tanh")
        
        def _backward():
            self.grad += (1 - out.val**2) * out.grad
        out._backward = _backward
        return out
    
    def backward(self):
        
        topo_order = []
        visited = set()
        
        self.grad = 1.0
        def topo_sort(self):
            if self in visited:
                return 
            visited.add(self)
            for parent in self.parents:
                topo_sort(parent)
            topo_order.append(self)
        topo_sort(self)
        
        self.grad = 1.0
        for node in reversed(topo_order):
            node._backward()
    

In [675]:
# # Line equation
# w = Node(2)
# x = Node(3)
# c = Node(6)

# def f(x):
#     return w.val * x.val + c.val

In [676]:
# # Gradient calculation
# h = Node(0.0000001)
# delta_x = x + h
# dy_dx = (f(delta_x) - f(x)) / h.val
# dy_dx

In [677]:
# Random initialize weights and biases

w1 = Node(random.random())
w2 = Node(random.random())
c1 = Node(random.random())
c2 = Node(random.random())

# Inputs
x1 = Node(4.0)
x2 = Node(-6.0)

w1x1 = w1 * x1
y1 = w1x1 + c1
w2x2 = w2 * x2
y2 = w2x2 + c2
y = y1 + y2
z = y.tanh()

# y._backward()
# y2._backward()
# y1._backward()
# w2x2._backward()
# w1x1._backward()

# z.backward()

In [678]:
z

0.4658688525582094

In [679]:
z.backward()

In [680]:
# Using Pytorch
import torch

torch_x1 = torch.tensor(x1.val)
torch_x2 = torch.tensor(x2.val)
torch_w1 = torch.tensor(w1.val)
torch_w2 = torch.tensor(w2.val)
torch_c1 = torch.tensor(c1.val)
torch_c2 = torch.tensor(c2.val)

torch_x1.requires_grad = True
torch_x2.requires_grad = True
torch_w1.requires_grad = True
torch_w2.requires_grad = True
torch_c1.requires_grad = True
torch_c2.requires_grad = True

torch_y = (torch_w1 * torch_x1 + torch_c1) + (torch_w2 * torch_x2 + torch_c2)
torch_z = torch.tanh(torch_y)

In [681]:
z

0.4658688525582094

In [682]:
torch_z

tensor(0.4659, grad_fn=<TanhBackward0>)

In [683]:
torch_z.backward()

In [684]:
print(torch_x1.grad)
print(x1.grad)
print(torch_x2.grad)
print(x2.grad)
print(torch_w1.grad)
print(w1.grad)
print(torch_w2.grad)
print(w2.grad)
print(torch_c1.grad)
print(c1.grad)
print(torch_c2.grad)
print(c2.grad)

tensor(0.6867)
0.6867285883899293
tensor(0.5757)
0.5757186002670746
tensor(3.1319)
3.131864848864389
tensor(-4.6978)
-4.697797273296584
tensor(0.7830)
0.7829662122160973
tensor(0.7830)
0.7829662122160973


In [685]:
class Neuron:
    
    def __init__(self, no_of_inputs):
        self.w = [Node(random.uniform(-1,1)) for _ in range(no_of_inputs)]
        self.b = Node(random.uniform(-1,1))
    
    def __call__(self, x):
        s = sum([wi * xi for wi, xi in zip(self.w,x)], self.b)
        return s.tanh()
    
    def parameters(self):
        return self.w + [self.b]
    
x = [2.0, -4.0]
n = Neuron(2)
n(x)

0.9996045562816742

In [686]:
class Layer:
    
    def __init__(self, in_nodes, out_nodes):
        self.neurons = [Neuron(in_nodes) for _ in range(out_nodes)]
    
    def __call__(self, x):
        return [n(x) for n in self.neurons]

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

x = [2.0, -4.0]
n = Layer(2, 3)
n(x)

[0.011420588370786336, 0.26380179097884005, 0.9995642255834293]

In [687]:
class MLP:
    
    def __init__(self, in_nodes, layer_specs):
        sizes = [in_nodes] + layer_specs
        self.layers = [Layer(sizes[i], sizes[i+1]) for i in range(len(layer_specs))]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

x = [2.0, -4.0, 6.0]
n = MLP(3, [4,4,1])
n(x)

[-0.45161691216228605]

In [688]:
# Test the neural Network

X = [[2.0,4.0,-8.0],
     [-6.0,3.0,5.0],
     [7.0,2.0,7.0],
     [8.0,9.0,-4.0]]
Y = [1.0, -1.0, -1.0, 1.0]
Y_pred = [n(x)[0] for x in X]

In [689]:
Y_pred

[0.6134792456102443,
 -0.39656343406800304,
 -0.1962253997132718,
 0.3062809878475438]

In [690]:
loss = sum([(Y_pred[i]-Y[i])**2 for i in range(len(Y))])
print(loss)

1.6408336585656964


In [691]:
n.layers[0].neurons[0].w[0].grad

0.0

In [696]:
len(n.parameters())

41

In [701]:
LEARNING_RATE = 0.01
EPOCHS = 25

for epoch in range(EPOCHS):
    
    # Forward pass
    Y_pred = [n(x)[0] for x in X]
    loss = sum([(Y_pred[i]-Y[i])**2 for i in range(len(Y))])
    
    # Zero-grad
    for p in n.parameters():
        p.grad = 0.0
    
    # Backward pass
    loss.backward()
    
    # Update
    for p in n.parameters():
        p.val += -LEARNING_RATE * p.grad
    
    print(epoch, loss)

0 0.04165207639793912
1 0.041037317466398326
2 0.04043989657852427
3 0.039859088291375945
4 0.03929420770399081
5 0.03874460760272856
6 0.03820967585034613
7 0.037688832994273544
8 0.03718153007240695
9 0.03668724659720476
10 0.03620548870102809
11 0.03573578742754556
12 0.035277697155669155
13 0.0348307941439311
14 0.034394675184481846
15 0.033968956357007855
16 0.03355327187385594
17 0.03314727300852532
18 0.03275062710046275
19 0.03236301662978529
20 0.031984138356167334
21 0.03161370251667412
22 0.03125143207781254
23 0.03089706203750489
24 0.030550338773081963


In [702]:
Y_pred

[0.9580498923710723,
 -0.9120606451659372,
 -0.9066485246830148,
 0.8889022989863676]