In [3]:
import numpy as np
import pandas as pd
%load_ext autoreload
%autoreload 2

In [4]:
import src

In [6]:
a = src.Value(5)
b = src.Value(6)

In [7]:
a

Value(data=5)

In [8]:
a + b

Value(data=11)

In [9]:
a*b

Value(data=30)

In [10]:
c = src.Value(2)

In [11]:
d = a * b + c + a

In [12]:
d

Value(data=37)

In [13]:
d.__dict__

{'data': 37,
 'grad': 0.0,
 '_prev': (Value(data=32), Value(data=5)),
 'ops': '+',
 '_backward': <function src.nn.Value.__add__.<locals>._backward()>,
 'label': None}

In [14]:
d._prev[0].__dict__

{'data': 32,
 'grad': 0.0,
 '_prev': (Value(data=30), Value(data=2)),
 'ops': '+',
 '_backward': <function src.nn.Value.__add__.<locals>._backward()>,
 'label': None}

In [15]:
d._prev[0]._prev

(Value(data=30), Value(data=2))

#### Testing implementation

In [17]:
a.grad, b.grad, c.grad, d.grad

(0.0, 0.0, 0.0, 0.0)

In [18]:
d.backward()

In [19]:
a.grad, b.grad, c.grad, d.grad

(7.0, 5.0, 1.0, 1)

In [20]:
2*a

Value(data=10)

In [21]:
a = src.Value(2)

In [22]:
a.exp()

Value(data=7.38905609893065)

In [23]:
a/d

Value(data=0.05405405405405406)

### Defining MLP and dependecies

In [24]:
import random

In [25]:
class Node:
    ## Represents a single neuron
    def __init__(self, n_in):
        self.w = [src.Value(random.random()) for _ in range(n_in)]
        self.b = src.Value(random.random())
    def __call__(self, x):
        activation = sum([w_i*x_i for w_i, x_i in zip(self.w, x)],self.b)
        out = activation.tanh()
        return out
    def parameters(self):
        return self.w + [self.b]
    

class Layer:
    ## Represents a single layer
    def __init__(self, n_in, n_out):
        self.nodes = [Node(n_in) for _ in range(n_out)]
    
    def __call__(self, x):
        outs = [node(x) for node in self.nodes]
        return outs[0] if len(outs)==1 else outs

    def parameters(self):
        return [p for node in self.nodes for p in node.parameters()]
    

class MLP:
    def __init__(self, n_in, out_sizes):
        size = [n_in] + out_sizes
        self.layers = [Layer(size[i], size[i+1]) for i in range(len(out_sizes))]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

### Testing

In [374]:
## Defining Inputs
x_in = [
    [1.0, -5.0, 8.0],
    [2.123, 1.123, -123],
    [-8.783, 89.34, -0.7742],
    [53.8763, 2.7243, -4.2378]
]
model = MLP(3, [4,4,1])

y_gt = [1, 0 , 1, 0]

In [375]:
len(model.parameters()) # Number of parameters in the model

41

In [392]:
def rmse_loss(y_preds,y_gts):
    return sum([(y_pred - y_gt)**2 for y_pred, y_gt in zip(y_preds, y_gts)])

In [391]:
model.parameters()[0].grad

0.00020150642991543282

In [395]:
## Run multiple times till the loss converges. 
for i in range(20):
    y_pred = [model(x_ini) for x_ini in x_in]

    # Zero Grad before calculating loss
    for params in model.parameters():
        params.grad = 0
    loss = rmse_loss(y_pred, y_gt)

    loss.backward()
    # Gradient Descent Update
    for param in model.parameters():
        param.data += - 0.05 * param.grad
    print(loss)

Value(data=0.0035334061124579676)
Value(data=0.003501284042150137)
Value(data=0.00346975540895388)
Value(data=0.003438803734529329)
Value(data=0.0034084131488859274)
Value(data=0.003378568362451159)
Value(data=0.0033492546396683806)
Value(data=0.003320457774027058)
Value(data=0.003292164064435142)
Value(data=0.0032643602928500285)
Value(data=0.0032370337030905205)
Value(data=0.003210171980757438)
Value(data=0.00318376323419566)
Value(data=0.003157795976435022)
Value(data=0.0031322591080516935)
Value(data=0.0031071419008957216)
Value(data=0.003082433982633862)
Value(data=0.0030581253220604942)
Value(data=0.0030342062151322313)
Value(data=0.0030106672716849964)


In [396]:
y_gt

[1, 0, 1, 0]

In [402]:
y_pred

[Value(data=0.9601300961417621),
 Value(data=-0.00047160135484707364),
 Value(data=0.9624270741779634),
 Value(data=0.0030184226598847152)]