In [48]:
import math
import random

In [45]:
class ZeroDimTensor:
    def __init__(self, val, _children=(), op="", label=""):
        self.val = float(val)
        self.grad = 0.0
        self.op = op
        self.label = label
        self._children = set(_children)
        self._apply_backward_pass = lambda: None

    def __repr__(self):
        return f"ZeroDimTensor(label='{self.label}', val={self.val}, grad={self.grad}, op='{self.op}')"

    def __add__(self, other):
        other = other if isinstance(other, ZeroDimTensor) else ZeroDimTensor(other)
        return_val = ZeroDimTensor(self.val + other.val, (self, other), "+") # forward pass

        def _backward_pass_add():
            # gradients accumulate, so use += instead of =
            self.grad += return_val.grad
            other.grad += return_val.grad
            
        return_val._apply_backward_pass = _backward_pass_add
        
        return return_val

    def __radd__(self, other):
        return self + other

    def __mul__(self, other):
        other = other if isinstance(other, ZeroDimTensor) else ZeroDimTensor(other)
        return_val = ZeroDimTensor(self.val * other.val, (self, other), "*") # forward pass
        
        def _backward_pass_mul():
            # gradients accumulate, so use += instead of =
            self.grad += other.val  * return_val.grad
            other.grad += self.val * return_val.grad
            
        return_val._apply_backward_pass = _backward_pass_mul;
        
        return return_val

    def __rmul__(self, other):
        return self * other

    def tanh(self):
        return_val = ZeroDimTensor(math.tanh(self.val), (self,), "tanh") # forward pass
        
        def _backward_pass_tanh():
            # gradients accumulate, so use += instead of =
            self.grad += (1 - return_val.val**2) * return_val.grad 
            
        return_val._apply_backward_pass = _backward_pass_tanh;
        
        return return_val

    def backward(self): # in the nature of the Pytorch API
        topological_ordering = []
        visited = set()
        
        def topological_sort(node):
            if node not in visited:
                visited.add(node)
                for child in node._children:
                    topological_sort(child)
                topological_ordering.append(node)

        self.grad = 1.0 # derivative of the first node is always 1
        topological_sort(self)

        for tensor in topological_ordering[::-1]:
            tensor._apply_backward_pass()

        for tensor in topological_ordering:
            print(tensor)

In [47]:
x1 = ZeroDimTensor(2.0, (), "", "x1")
w1 = ZeroDimTensor(-3.0, (), "", "w1")

x2 = ZeroDimTensor(0.0, (), "", "x2")
w2 = ZeroDimTensor(1.0, (), "", "w2")

b = ZeroDimTensor(6.8814, (), "", "b")

w1x1 = w1 * x1
w1x1.label = "w1x1"

w2x2 = w2 * x2
w2x2.label = "w2x2"

w1x1_plus_w2x2 = w1x1 + w2x2
w1x1_plus_w2x2.label = "w1x1_plus_w2x2"

n = w1x1_plus_w2x2 + b
n.label = "n"

o = n.tanh()
o.label = "o"
o.backward()

ZeroDimTensor(label='x2', val=0.0, grad=0.4999813233768232, op='')
ZeroDimTensor(label='w2', val=1.0, grad=0.0, op='')
ZeroDimTensor(label='w2x2', val=0.0, grad=0.4999813233768232, op='*')
ZeroDimTensor(label='w1', val=-3.0, grad=0.9999626467536464, op='')
ZeroDimTensor(label='x1', val=2.0, grad=-1.4999439701304698, op='')
ZeroDimTensor(label='w1x1', val=-6.0, grad=0.4999813233768232, op='*')
ZeroDimTensor(label='w1x1_plus_w2x2', val=-6.0, grad=0.4999813233768232, op='+')
ZeroDimTensor(label='b', val=6.8814, grad=0.4999813233768232, op='')
ZeroDimTensor(label='n', val=0.8814000000000002, grad=0.4999813233768232, op='+')
ZeroDimTensor(label='o', val=0.7071199874301226, grad=1.0, op='tanh')


In [76]:
class Perceptron:
    def __init__(self, num_inputs):
        self.weight = [ZeroDimTensor(random.uniform(-1,1), label="weight") for _ in range(num_inputs)] # random weights
        self.bias = ZeroDimTensor(random.uniform(-1,1), label="bias")

    def __call__(self, x):
        # calculate w_i * x_i + b
        raw_activation = 0.0
        for i in range(len(x)):
            raw_activation += x[i] * self.weight[i]
        raw_activation += b

        return raw_activation.tanh() # a perceptron is just a series of mathematical transformations represented as a tensor graph


In [84]:
neuron = Perceptron(2)
x = [9.0, 10.0]
neuron(x)

ZeroDimTensor(label='', val=0.9999999999999355, grad=0.0, op='tanh')

In [89]:
class Layer:
    def __init__(self, num_inputs, num_outputs):
        # initialize the layer of perceptrons with random weights and biases
        self.layer_of_perceptrons = [Perceptron(num_inputs) for _ in range(num_outputs)]

    def __call__(self, x):
        # do a forward pass, passing in x (inputs) for each perceptron in the layer
        return [perceptron(x) for perceptron in self.layer_of_perceptrons]

In [88]:
layer = Layer(2, 3) # 2 inputs per perceptron, and there is a layer of 3 perceptrons
x = [9.0, 10.0]
layer(x) # output the forward pass for 3 perceptrons each with input values x, random weights, and a random bias

[ZeroDimTensor(label='', val=0.9999999992128678, grad=0.0, op='tanh'),
 ZeroDimTensor(label='', val=0.9999999996028481, grad=0.0, op='tanh'),
 ZeroDimTensor(label='', val=0.9999999999999996, grad=0.0, op='tanh')]

In [91]:
class MultiLayerPerceptron:
    def __init__(self, num_inputs, num_outputs):
        sizes = [num_inputs] + num_outputs # list concatenation. The ls will have all the layers
        # sizes = [no perceptrons in input layer, num perceptrons in hidden layer 1, 
        #          num perceptrons in hidden layer size 2, ... , num perceptrons in the output layer] 

        # create layers such that the number of outputs for each layer correctly matches the number of inputs for the next one
        self.layers = [Layer(sizes[i - 1], sizes[i]) for i in range(1, len(sizes))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x) # compute forward pass at each layer, passing the output array as the input for the next layer
        return x
    

In [None]:
x = [1.0, 2.0, -3.0]
nn = MultiLayerPerceptron(3, [4, 4, 1])