In [60]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [61]:
class Value:

    def __init__(self, data, _children = (), _op = '', _label = ''):
        self.data = data #Value 
        self._prev = set(_children) #Set of children
        self.grad = 0 #Gradient 
        self._backward = lambda: None #Backward func initialized to none
        self._op = _op #Operation used to get node
        self._label = _label #label/name

    def __repr__(self): 
        return f"Value(data={self.data})"

    def __add__(self, other): #Add function used simply by + operator
        other = other if isinstance(other, Value) else Value(other) #If adding by a non Value object (eg number)
        out = Value(self.data + other.data, (self, other), '+') #Create new val
        def _backward():
            self.grad += 1.0 * out.grad 
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out

    def __mul__(self, other): #Multiply func used by typing * operator
        other = other if isinstance(other, Value) else Value(other) #If multyplying by a non Value object (eg number)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers"
        out = Value(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad #Power rule: d/dx x^n = n*x^n-1
        out._backward = _backward

        return out
    
    def tanh (self): #tanh function
        x = self.data
        t = (math.exp (2*x)- 1) / (math.exp(2*x) + 1)
        out = Value(t, (self, ), 'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        return out

    def backward(self):

        #Topological Sort:
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        #Do backprop in reversed topological order
        self.grad = 1.0
        for node in reversed (topo):
            node._backward()
    
    #All functions with r (like __radd__) are reverse functions. 
    #Adding int + Value will give an error since __add__ expects the first param to be a Value
    #Python checks second param to see if it's add func can handle this hence radd will be used

    def __neg__(self): # -self
        return self * -1

    def __radd__(self, other): # other + self
        return self + other

    def __sub__(self, other): # self - other
        return self + (-other)

    def __rsub__(self, other): # other - self
        return other + (-self)

    def __rmul__(self, other): # other * self
        return self * other

    def __truediv__(self, other): # self / other
        return self * other**-1

    def __rtruediv__(self, other): # other / self
        return other * self**-1   

In [62]:
#Using pytorch
import torch

#Create tensors - by default requires_grad is set to False for leaf nodes (as usually input nodes are data and dont change)
x1 = torch.tensor([2.0], dtype=torch.double, requires_grad=True)
x2 = torch.tensor([0.0], dtype=torch.double, requires_grad=True)
w1 = torch.tensor([-3.0], dtype=torch.double, requires_grad=True)
w2 = torch.tensor([1.0], dtype=torch.double, requires_grad=True)
b  = torch.tensor([6.8813735870195432], dtype=torch.double, requires_grad=True)

n = x1 * w1 + x2 * w2 + b
o = torch.tanh(n)

#Prints number without tensor
print(o.item())
#Backprop
o.backward()

print('___')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())


0.7071067811865476
___
x2 0.49999999999999994
w2 0.0
x1 -1.4999999999999998
w1 0.9999999999999999


In [63]:
import random

class Neuron:

    def __init__ (self, numInputs):
        self.w = [Value(random.uniform(-1, 1)) for _ in range (numInputs)] #Weights coming into the neuron
        self.b = Value(random.uniform(-1, 1))
    
    def __call__(self, x): #Call function is like using a variable n of type neuron and x of type array. So when you do n(x), it does n.__call__(x)
        # w dot product x + b
        #Forward pass:
        act = sum((wi * xi for wi, xi in zip (self.w, x)), start=self.b)  #zip pairs up w and x so (w[0], x[0]) ...
        out = act.tanh()
        return out

    def parameters (self):
        return self.w + [self.b]

class Layer:
    def __init__ (self, numInputs, numNeurons): #numInputs is num of inputs to each neuron
        self.neurons = [Neuron(numInputs) for _ in range (numNeurons)]
    
    def __call__ (self, input):
        outs = [neuron(input) for neuron in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters (self):
        params = []
        for neuron in self.neurons:
            ps = neuron.parameters()
            params.extend(ps)
        return params

class MLP:
    def __init__(self, numInputs, numNeuronsArr):
        size = [numInputs] + numNeuronsArr #[numInputs, n1, n2, ... , nn]
        self.layers = [Layer(size[i], size[i+1]) for i in range (len(numNeuronsArr))] 
    
    def __call__(self, input):
        for layer in self.layers:
            input = layer(input)
        return input
    
    def parameters (self):
        params = []
        for layer in self.layers:
            ps = layer.parameters()
            params.extend(ps)
        return params

In [64]:
x = [2.0, 3.0, -1.0]
# MLP: 3 inputs → 4 neurons → 4 neurons → 1 output
n = MLP(3, [4, 4, 1])
n(x)

Value(data=-0.943948480493991)

In [65]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]

ys = [1.0, -1.0, -1.0, 1.0]  # targets

# Run predictions
ypred = [n(x) for x in xs]

In [66]:
for k in range (20):
    # Foward pass: Run predictions
    ypred = [n(x) for x in xs]
    loss = sum([(yPred - yVal)**2 for yVal, yPred in zip (ys, ypred)])  #Loss function: Squared error

    #Backward pass
    for p in n.parameters():
        p.grad = 0.0 #Reset all parameter gradients to 0
    loss.backward()

    #Update
    for p in n.parameters():
        p.data += -0.05 * p.grad
    
    print(k, loss.data)

0 7.498438275255379
1 6.681081232828469
2 5.7656185480199795
3 3.6348311871699055
4 2.9909643658343987
5 2.652216849965963
6 2.3139698791700694
7 1.8938204333046698
8 1.4736331161271492
9 1.1209701385313449
10 0.8502439786453476
11 0.6523408335826156
12 0.5102599532719184
13 0.4078179914243315
14 0.3327542144536048
15 0.27660907558111814
16 0.23372281809179968
17 0.20031197299500772
18 0.17381448162218666
19 0.1524629152708967


In [67]:
ypred

[Value(data=0.8384771117431458),
 Value(data=-0.738421044948126),
 Value(data=-0.9101728113175854),
 Value(data=0.7766599044339686)]