In [1]:
import math
import random

In [2]:
class ZeroDimTensor:
    def __init__(self, val, _children=(), _op="", label=""):
        self.val = float(val)
        self.grad = 0.0
        self._op = _op # '_op' for consistency with Pytorch
        self.label = label
        self._children = set(_children)
        self._backward = lambda: None # this i 

    def __repr__(self):
        return f"ZeroDimTensor(label='{self.label}', val={self.val}, grad={self.grad}, op='{self._op}')"

    def __add__(self, other):
        other = other if isinstance(other, ZeroDimTensor) else ZeroDimTensor(other)
        return_val = ZeroDimTensor(self.val + other.val, (self, other), "+")

        def _backward_pass_add():
            # Gradients accumulate, so use += instead of = for all backward pass methods
            self.grad += return_val.grad
            other.grad += return_val.grad
            
        return_val._backward = _backward_pass_add # Assign to _backward
        
        return return_val

    def __radd__(self, other):
        return self + other

    def __mul__(self, other):
        other = other if isinstance(other, ZeroDimTensor) else ZeroDimTensor(other)
        return_val = ZeroDimTensor(self.val * other.val, (self, other), "*")
        
        def _backward_pass_mul():
            self.grad += other.val * return_val.grad
            other.grad += self.val * return_val.grad
            
        return_val._backward = _backward_pass_mul # Assign to _backward
        
        return return_val

    def __rmul__(self, other):
        return self * other

    def __sub__(self, other):
        other = other if isinstance(other, ZeroDimTensor) else ZeroDimTensor(other)
        return self + (-other)

    def __neg__(self):
        return self * -1

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "Power must be a scalar"
        return_val = ZeroDimTensor(self.val**other, (self,), f'**{other}')
        
        def _backward_pass_pow():
            self.grad += return_val.grad * (other * (self.val**(other-1)))
        return_val._backward = _backward_pass_pow
        return return_val

    def tanh(self):
        return_val = ZeroDimTensor(math.tanh(self.val), (self,), "tanh")
        
        def _backward_pass_tanh():
            # Correct local derivative for tanh
            self.grad += (1 - return_val.val**2) * return_val.grad 
            
        return_val._backward = _backward_pass_tanh # Assign to _backward
        
        return return_val

    def backward(self): # Implements the backward pass (like PyTorch API)
        # Zero out all gradients in the graph before starting
        nodes_to_zero = set()
        stack = [self]
        while stack:
            node = stack.pop()
            if node not in nodes_to_zero:
                nodes_to_zero.add(node)
                for child in node._children:
                    stack.append(child)
        
        for node in nodes_to_zero:
            node.grad = 0.0

        topological_ordering = []
        visited = set()
        
        def topological_sort(node):
            if node not in visited:
                visited.add(node)
                for child in node._children:
                    topological_sort(child)
                topological_ordering.append(node)

        self.grad = 1.0 # Derivative of the root node (loss) is always 1
        topological_sort(self)

        for tensor in topological_ordering[::-1]: # Iterate in reverse topological order
            tensor._backward() # Call the stored backward function

        # For debugging: print tensors and their gradients after backward pass
        print("\n--- Tensors and Gradients (after backward pass) ---")
        for tensor in topological_ordering:
            print(tensor)

In [3]:
x1 = ZeroDimTensor(2.0, (), "", "x1")
w1 = ZeroDimTensor(-3.0, (), "", "w1")

x2 = ZeroDimTensor(0.0, (), "", "x2")
w2 = ZeroDimTensor(1.0, (), "", "w2")

b = ZeroDimTensor(6.8814, (), "", "b")

w1x1 = w1 * x1
w1x1.label = "w1x1"

w2x2 = w2 * x2
w2x2.label = "w2x2"

w1x1_plus_w2x2 = w1x1 + w2x2
w1x1_plus_w2x2.label = "w1x1_plus_w2x2"

n = w1x1_plus_w2x2 + b
n.label = "n"

o = n.tanh()
o.label = "o"
o.backward()


--- Tensors and Gradients (after backward pass) ---
ZeroDimTensor(label='x2', val=0.0, grad=0.4999813233768232, op='')
ZeroDimTensor(label='w2', val=1.0, grad=0.0, op='')
ZeroDimTensor(label='w2x2', val=0.0, grad=0.4999813233768232, op='*')
ZeroDimTensor(label='w1', val=-3.0, grad=0.9999626467536464, op='')
ZeroDimTensor(label='x1', val=2.0, grad=-1.4999439701304698, op='')
ZeroDimTensor(label='w1x1', val=-6.0, grad=0.4999813233768232, op='*')
ZeroDimTensor(label='w1x1_plus_w2x2', val=-6.0, grad=0.4999813233768232, op='+')
ZeroDimTensor(label='b', val=6.8814, grad=0.4999813233768232, op='')
ZeroDimTensor(label='n', val=0.8814000000000002, grad=0.4999813233768232, op='+')
ZeroDimTensor(label='o', val=0.7071199874301226, grad=1.0, op='tanh')


In [4]:
class Perceptron:
    def __init__(self, num_inputs):
        # Initialize weights as a list of ZeroDimTensor objects
        self.weights = [ZeroDimTensor(random.uniform(-1,1), label=f"w{i}") for i in range(num_inputs)] # random weights with labels
        # Initialize bias as a ZeroDimTensor object
        self.bias = ZeroDimTensor(random.uniform(-1,1), label="b")

    def __call__(self, x_inputs):
        raw_activation = ZeroDimTensor(0.0, label="raw_act") 
        
        # Calculate w_i * x_i + b
        for i in range(len(x_inputs)):
            # Ensure x_inputs[i] is a ZeroDimTensor. If it's a float, __mul__ will convert it.
            # But passing ZeroDimTensor explicitly is safer for graph consistency.
            raw_activation += x_inputs[i] * self.weights[i]
        
        raw_activation += self.bias 

        # Apply the activation function (tanh)
        return raw_activation.tanh() # This proves that perceptrons and neural nets are just mathematical expressions


In [5]:
neuron = Perceptron(2)
x = [9.0, 10.0]
neuron(x)

ZeroDimTensor(label='', val=-0.9999999979429093, grad=0.0, op='tanh')

In [6]:
class Layer:
    def __init__(self, num_inputs, num_outputs):
        # Initialize the layer of perceptrons with random weights and biases
        self.perceptrons = [Perceptron(num_inputs) for _ in range(num_outputs)] # Renamed for clarity

    def __call__(self, x):
        # Do a forward pass, passing in x (inputs) for each perceptron in the layer
        # The output is a list of ZeroDimTensor objects
        return [perceptron(x) for perceptron in self.perceptrons]

In [7]:
layer = Layer(2, 3) # 2 inputs per perceptron, and there is a layer of 3 perceptrons
x = [9.0, 10.0]
layer(x) # output the forward pass for 3 perceptrons each with input values x, random weights, and a random bias

[ZeroDimTensor(label='', val=0.9999919634512422, grad=0.0, op='tanh'),
 ZeroDimTensor(label='', val=-0.9999999574766677, grad=0.0, op='tanh'),
 ZeroDimTensor(label='', val=0.9999999928429436, grad=0.0, op='tanh')]

In [8]:
class MultiLayerPerceptron:
    def __init__(self, num_inputs, num_outputs_per_layer):
        # sizes = [input_layer_size, hidden_layer1_size, ..., output_layer_size]
        sizes = [num_inputs] + num_outputs_per_layer 

        # Create layers such that the number of outputs for each layer correctly matches the number of inputs for the next one
        self.layers = [Layer(sizes[i - 1], sizes[i]) for i in range(1, len(sizes))]

    def __call__(self, x_inputs):
        # Pass the input through each layer sequentially
        for layer in self.layers:
            x_inputs = layer(x_inputs) # The output of one layer becomes the input for the next
        return x_inputs # The final output of the network
    

In [9]:
x = [1.0, 2.0, -3.0]
nn = MultiLayerPerceptron(3, [4, 4, 1])
nn(x)

[ZeroDimTensor(label='', val=-0.9219678047939753, grad=0.0, op='tanh')]

In [10]:
# Create a binary classifier

# Inputs: x training data
xt = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0,  1.0],
    [1.0, 1.0, -1.0]
]

# Desired outputs: binary classifier, either -1 or 1 for each feature (in this case, row) in the input dataset
yt = [-1.0, 1.0, 1.0, -1.0]

# Convert xt to a list of lists of ZeroDimTensor objects
xt_tensors = [[ZeroDimTensor(val, label=f"x_feature{j}") for j, val in enumerate(row)] for row in xt]
yt_tensors = [ZeroDimTensor(val, label="target") for val in yt] 

# 1. Create the MultiLayerPerceptron model
model = MultiLayerPerceptron(num_inputs=3, num_outputs_per_layer=[4, 1])

yhats = [model(x_row) for x_row in xt_tensors] 

yhats



[[ZeroDimTensor(label='', val=0.44183136181958993, grad=0.0, op='tanh')],
 [ZeroDimTensor(label='', val=0.8560231358110179, grad=0.0, op='tanh')],
 [ZeroDimTensor(label='', val=0.8073855243948576, grad=0.0, op='tanh')],
 [ZeroDimTensor(label='', val=0.7512066653345022, grad=0.0, op='tanh')]]