In [None]:
import math
import random

## Tensor

In [None]:
class Tensor:
    def __init__(self, value, label='', children=(), operator=None):
        self.value = value
        self.children = set(children)
        self.operator = operator
        self.grad = 0  # Gradient of the tensor
        self._backward = lambda: None  # Lambda that does nothing by default
        self.label = label

    def __repr__(self) -> str:
        return f"Tensor(value={self.value}, grad={self.grad})"

    def __mul__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.value * other.value, children=(self, other), operator='*')

        def backward():
            self.grad += other.value * out.grad
            other.grad += self.value * out.grad
        out._backward = backward

        return out

    def __add__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.value + other.value, children=(self, other), operator='+')

        def backward():
            # For addition, the gradient of input wrt output is 1
            self.grad += 1 * out.grad
            other.grad += 1 * out.grad
        out._backward = backward

        return out

    def __sub__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.value - other.value, children=(self, other), operator='-')

        def backward():
            self.grad += 1 * out.grad
            other.grad -= 1 * out.grad
        out._backward = backward

        return out

    def __rsub__(self, other):

        return -self + other

    def __pow__(self, other):
        out = Tensor(self.value ** other, children=(self,), operator='**')

        def backward():
            self.grad += other * (self.value ** (other - 1)) * out.grad
        out._backward = backward

        return out

    def __radd__(self, other):
        # Support adding tensors on the right side of numbers
        return self + other

    def __rmul__(self, other):
        # Support multiplying tensors on the right side of numbers
        return self * other

    def backward(self, grad=1):

        self.grad = grad  # Initialize the gradient

        topo_order = []

        def toposort(tensor):
            if tensor not in topo_order:
                for child in tensor.children:
                    toposort(child)
                topo_order.append(tensor)

        toposort(self)

        # Reverse topo_order for correct backward pass execution
        for tensor in reversed(topo_order):
            tensor._backward()

## Forward Section

In [None]:
class Neuron:

    def __init__(self, input_size):
        self.weights = [Tensor(random.uniform(-1,1)) for i in range(input_size)]  # Initialize weights to ranodom values between -1 and 1
        self.bias = Tensor(random.uniform(-1,1))  # Initialize bias to a ranodom value between -1 and 1

    def forward(self, x):
        res = sum([w_i * x_i for w_i, x_i in zip(self.weights, x)])  # Compute weighted sum
        return F.tanh(res + self.bias)  # Add bias

    def __call__(self, x):
        return self.forward(x)  # Make instance callable

    def parameters(self):
        # Return all the weights and bias as a list
        return self.weights + [self.bias]

# Tanh activation function

In [None]:
class F:
    @staticmethod
    def tanh(x: Tensor) -> Tensor:
        # Compute tanh using the provided formula
        output_value = (math.exp(x.value) - math.exp(-x.value)) / (math.exp(x.value) + math.exp(-x.value))
        out = Tensor(output_value, children=(x,), operator='tanh')

        def backward():
            x.grad += (1 - out.value ** 2) * out.grad

        out._backward = backward

        return out

# Layers

In [None]:
class Layer:

  def __init__(self, input_size, output_size):
    self.neurons = [Neuron(input_size) for _ in range(output_size)]

  def forward(self, x):
    out = [neuron(x) for neuron in self.neurons]
    return out[0] if len(out)==1 else out

  def __call__(self, x):
    return self.forward(x)

  def parameters(self):
    params = []
    for neuron in self.neurons:
      params+=neuron.parameters()
    return params


# MLP

In [None]:
class MLP:
    def __init__(self, input_size, layer_sizes):
        layers_total = [input_size] + layer_sizes
        self.layers = [Layer(layers_total[i], layers_total[i+1]) for i in range(len(layer_sizes))]

    def forward(self, x):
        for layer in self.layers:
            x = layer(x) # Use the layers as callable to perform forward pass
        return x

    def __call__(self, x):
        return self.forward(x)

    def parameters(self):
        # Retrieve parameters from all layers
        params = []
        for layer in self.layers:
            params += layer.parameters()
        return params

# Initilizing a dataset and corresponding labels to feed the model

In [None]:
X = [[2.0,3.0,-1.0],
     [3.0,-1.0,0.5],
     [0.5,1.0,1.0],
     [3.0,1.0,-1.0]]
Y = [1.0,-1.0,-1.0,1.0]
input_size = len(X) # Number of features in the input layer
layer_sizes = [4,2,1] # Number of neurons in each hidden and output layer
model = MLP(input_size, layer_sizes)

# Optimizer

In [None]:
class Optimizer:
    def __init__(self, parameters, lr):
        self.parameters = list(parameters)
        self.lr = lr

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad = 0

    def step(self):
        for param in self.parameters:
            if param.grad is not None:
                param.value -= self.lr * param.grad

# Constructing an object from the optimization class

In [None]:
optim = Optimizer(model.parameters(),0.01)

# Implementation of Gradient Descent Optimization

In [None]:
n_epochs = 200

for epoch in range(n_epochs):

  for x,y in zip(X,Y):

    y_hat = model(x)
    loss = (y_hat-y)**2*len(X)**-1

    optim.zero_grad()

    loss.backward()

    optim.step()

  print(f"epoch: {epoch}: ",f"loss: {loss}")

epoch: 0:  loss: Tensor(value=0.24672508022134787, grad=1)
epoch: 1:  loss: Tensor(value=0.24130503528839853, grad=1)
epoch: 2:  loss: Tensor(value=0.23534651493384845, grad=1)
epoch: 3:  loss: Tensor(value=0.22881733881999, grad=1)
epoch: 4:  loss: Tensor(value=0.22169231042266804, grad=1)
epoch: 5:  loss: Tensor(value=0.2139568436463607, grad=1)
epoch: 6:  loss: Tensor(value=0.20561115892982898, grad=1)
epoch: 7:  loss: Tensor(value=0.19667476051819915, grad=1)
epoch: 8:  loss: Tensor(value=0.18719068132979996, grad=1)
epoch: 9:  loss: Tensor(value=0.17722874344974557, grad=1)
epoch: 10:  loss: Tensor(value=0.1668869072354576, grad=1)
epoch: 11:  loss: Tensor(value=0.15628978145570208, grad=1)
epoch: 12:  loss: Tensor(value=0.14558365245958135, grad=1)
epoch: 13:  loss: Tensor(value=0.13492800698245452, grad=1)
epoch: 14:  loss: Tensor(value=0.12448437389877168, grad=1)
epoch: 15:  loss: Tensor(value=0.11440413223506624, grad=1)
epoch: 16:  loss: Tensor(value=0.10481738176712067, gra