In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
from pprint import pprint
import os
import random

sys.path.append(os.path.abspath('./graphvisualizer'))
import graphvisualizer.graph_visualizer as gv 

### Neural networks from scratch

In [2]:
# We want to create our own class which overloads the operator in python - operator overloading

class Value:
    def __init__(self, data, _children = (), _op = '', label = ''):
        self.data = data
        # Track the operands that generates this Value, by default - a single number - no children
        self._prev = set(_children)
        self._op= _op
        self.label = label
        self.grad = 0.0
        self._backward = lambda: None

    def __repr__(self):
        return f'Value(data= {self.data})'
    
    # Operator overloading using python magic functions
    def __add__(self, other) -> float:
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        def backward(): self.grad += 1.0 * out.grad; other.grad += 1.0 * out.grad
        out._backward = backward
        return out
    
    def __radd__(self, other): # other + self
        return self + other
    
    def __mul__(self, other) -> float:
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        def backward(): self.grad += other.data * out.grad ; other.grad += self.data * out.grad
        out._backward = backward
        return out
    
    def __rmul__(self, other):
        return self * other
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
        
    def __rsub__(self, other): # other - self
        return other + (-self)
    
    def exp(self):
        out = Value(math.exp(x), (self, ), 'exp')
        def backward(): self.grad += out.data * out.grad
        out._backward = backward
        
        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supports int/ float powers"
        out = Value(self.data ** other, (self, ), f'**{other}')
        def backward(): self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = backward
        return out
    
    def __truediv__(self, other):
        out = self * other**-1 # Just re-calling __mul__() and __pow__()
        return out
    
    def __rtruediv__(self, other): # other / self
        return other * self**-1
        
    
    def tanh(self):
        result = (np.exp(2*self.data) - 1)/(np.exp(2*self.data) + 1)
        out = Value(result, (self, ), _op = 'tanh') # By default tuple of a single object is just the object e.g., (3) vs (3,)
        def backward(): self.grad += (1.0 - result**2) * out.grad
        out._backward = backward
        return out
    
    def backward(self):
       
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        
        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()


In [3]:
class Neuron:
    # Takes the number of weights to create (in the neuron), and generates its the corresponding weights, and one bias in preparation 
    def __init__(self, nin: int) -> float:
        # These two pieces of int casting from int -> Value is crucial to allow the computational graph to manifest
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))
       
    # Forward pass
    # Takes a list of scalars (with a list of initialized weights that was initialized upon creation), and outputs a single float value
    def __call__(self, x: list[float]) -> float:
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]
    
class Layer:
    # Takes in the number of weights in a neuron, and the number of neurons in a layer
    def __init__(self, nin, nout):
        # For each neuron, generate their corresponding weights; and stack each neuron independently in a list
        self.neurons = [Neuron(nin) for _ in range(nout)]
  
    # Forward pass
    # Iterates each neurons that were created in a layer, and pass in a list of input that matches the shape of the created weights in each neuron.
    # This effectively calculates the scalar output for all n neurons in the layer, hence n scalars.
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
class MLP:
    def __init__(self, nin, nouts):
        sz = [nin]+ nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
   
    # Forward pass
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [4]:
# Initialize weights, neurons, layers, MLP in that constitutes a neural net
n = MLP(3, [4,4,1])

# Simple training data
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
] # list of integer scalars

# Target
ys = [1.0, -1.0, -1.0, 1.0] # list of integer scalars


In [5]:
ypred = [n(x) for x in xs] # ypred - list of Value object
print('Prediction BEFORE feeding/ optimizing through a neural net')
ypred

Prediction BEFORE feeding/ optimizing through a neural net


[Value(data= -0.11130557615924767),
 Value(data= -0.1006063816524968),
 Value(data= 0.7119121956399126),
 Value(data= -0.11339618912115094)]

In [6]:
def grad_descent(epoch):
    ypred = 0
    for k in range(epoch):
        ypred = [n(x) for x in xs] # ypred - list of Value object
        # Loss function 
        # Ideally it has to be tied also to into the computational graph
        # Since ypred are all Value objects, any operations on them in the loss function will be part of the computational graph

       
        # NN optimization only requires 4 steps - if each component is present (and works properly) you have a working NN
        # 1) Forward pass
        loss = sum([(yhat - y)**2 for y, yhat in zip(ys, ypred)]) # Squared error loss function
        
        
        # 2) Backward pass
        loss.backward()

        # 3) Update weights
        for p in n.parameters():
            p.data += - 0.1 * p.grad
            
        # 4) Zero grad
        for p in n.parameters():
            p.grad = 0.0
            
        print(f'epoch: {k} | loss: {loss}')
    return ypred

In [7]:
ypred = grad_descent(30)

epoch: 0 | loss: Value(data= 6.21420340385702)
epoch: 1 | loss: Value(data= 5.1506324905310015)
epoch: 2 | loss: Value(data= 4.4347227580331765)
epoch: 3 | loss: Value(data= 4.210451295691955)
epoch: 4 | loss: Value(data= 4.086442917919335)
epoch: 5 | loss: Value(data= 3.9335510552355144)
epoch: 6 | loss: Value(data= 3.6399824339648372)
epoch: 7 | loss: Value(data= 3.160327560571886)
epoch: 8 | loss: Value(data= 2.6678708049788966)
epoch: 9 | loss: Value(data= 4.325523631235425)
epoch: 10 | loss: Value(data= 1.8943733186011018)
epoch: 11 | loss: Value(data= 1.5131507090524567)
epoch: 12 | loss: Value(data= 0.04810093058496151)
epoch: 13 | loss: Value(data= 0.037601516805717425)
epoch: 14 | loss: Value(data= 0.03161412274904266)
epoch: 15 | loss: Value(data= 0.02762871980159993)
epoch: 16 | loss: Value(data= 0.024725005053206822)
epoch: 17 | loss: Value(data= 0.022480530884789605)
epoch: 18 | loss: Value(data= 0.020672641945358835)
epoch: 19 | loss: Value(data= 0.019172111135198046)
epo

In [8]:
print('Prediction AFTER feeding/ optimizing in a neural net')
ypred

Prediction AFTER feeding/ optimizing in a neural net


[Value(data= 0.9530811117038198),
 Value(data= -0.9332000206104153),
 Value(data= -0.9658163437370157),
 Value(data= 0.9407268739442083)]

In [11]:
print('Target prediction')
print(ys)

Target prediction
[1.0, -1.0, -1.0, 1.0]


### Close enough ✅