In [87]:
import math 

In [88]:
class Value:
    def __init__(self, data, _children = (), _op = ""):
        self.data = data
        self.grad = 0.0 # default to no effect 
        self._backward = lambda: None # empty function by default 
        self._prev = set(_children) # take the set of the children tuple 
        self._op = _op # last operation 

    def __repr__(self):
        return f"Value(data={self.data})" # display Value object nicely 
    
    def __add__(self, other): # __add__ is equivalent to "+"
        other = other if isinstance(other, Value) else Value(other) # If we add a value object to an int, wrap the int into a Value object
        out = Value(self.data + other.data, (self, other), "+") #set other and self to children 
        def _backward():
            self.grad += 1.0 * out.grad # because derivative do/da and do/db for addition d = a + b is just 1 
            other.grad += 1.0 * out.grad # use += to accumulate gradients in the case of multivariable chain rule (if we use the same node more than once in the network)
        out._backward = _backward
        return out 
    
    def __radd__(self, other):
        return self + other
    
    def __neg__ (self): # Self * -1
        return self * -1 
    
    def __sub__(self, other): # self - other = self + (-other) (__neg__ operation)
        return self + (-other)
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other) 
        out = Value(self.data * other.data, (self, other), "*")
        def _backward():
            self.grad += other.data * out.grad # because derivative of do/da for o = a*b is b, multiplied by global derivative o.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out 
        
    
    def __rmul__(self,other): # Check the reverse multiplication if forward multiplication is invalid 
        return self*other
    
    def exp(self):
        out = Value(math.exp(self.data), (self, ), "exp")

        def _backward():
            self.grad += out.data * out.grad # d/dx(e^x) = e^x, in this case out.data = e^x
        out._backward = _backward
        return out 
    
    def pow(self, other):
        assert isinstance(other, (int, float)), "only support int/float power"
        out = Value(self.data ** other, (self, ), "pow")

        def _backward():
            self.grad += other* (self.data ** (other-1)) * out.grad # power rule 
        out._backward = _backward
        return out 
    
    def __truediv__(self, other): #calculate self ÷ other 
        return self * (other**-1) 
    
    def tanh(self): #implement tanh activation (instead of defining power function separately for the Value class)
        x = self.data 
        t = (math.exp(2*x)-1)/(math.exp(2*x) + 1) # formula for tanh 
        out = Value(t, (self, ), "tanh")

        def _backward():
            self.grad += (1-t**2) * out._grad # using formula for derivative of tanh, only self here (no "other" node) 
        out._backward = _backward
        return out 
    

    def backward (self):

        topological_sort = [] 
        visited = set() 
        def build_topological(v): # recursive topological sort algorithm: returns a list, topologically ordered
            if v not in visited: 
                visited.add(v) 
                for child in v._prev:
                    build_topological(child) 
                topological_sort.append(v) 

        build_topological(self)
        self.grad = 1.0 # base case
        # Note: we need to traverse list in reverse topological order to ensure we have the global gradients before computing local ones 
        for node in reversed(topological_sort):
            node._backward() 

In [89]:
# Verifying the correctness of the Value class
a = Value(5.0)
b = Value(-9.0)
d = a * b    
e = a + b 
f = d * e 

f.backward()
names = ["a", "b", "d", "e", "f"]
for name, x in zip(names, [a, b, d, e, f]):
    print(f"{name}: value = {x.data:.1f}, grad = {x.grad:.1f}")

a: value = 5.0, grad = -9.0
b: value = -9.0, grad = -65.0
d: value = -45.0, grad = -4.0
e: value = -4.0, grad = -45.0
f: value = 180.0, grad = 1.0


In [90]:
import random 
class Neuron: 
    def __init__(self, n_inputs): #n_inputs is number of inputs to specific neuron
        self.w = [Value(random.uniform(-1,1)) for _ in range (n_inputs)]
        self.b = Value(random.uniform(-1,1))

    def __call__(self,x): # Calls n(x) where x is data and n is a neuron 
        weighted_sum = sum(w_i * x_i for w_i, x_i in zip(self.w, x)) + self.b # w*x + b
        activation = weighted_sum.tanh()
        return activation

    def parameters(self):
        return self.w + [self.b] # All parameters for that neuron

class Layer:
    def __init__ (self, n_inputs, n_outputs): # n_outputs is the number of neurons in the layer 
        self.neurons = [Neuron(n_inputs) for _ in range (n_outputs)]
    def __call__(self, x): 
        outputs = [n(x) for n in self.neurons]
        return outputs
    def parameters(self):
        parameters = [] 
        for neuron in self.neurons:
            ps = neuron.parameters() 
            parameters.extend(ps) 
        return parameters 

class MLP: 
    def __init__(self, n_inputs, n_outputs): # n_outputs is a list of the sizes of each layer
        layer_size = [n_inputs] + n_outputs # Concatenate the input layer to the list of other layers
        self.layers = []
        for i in range(len(n_outputs)): # layer_size is of size n_outputs+1
            self.layers.append(Layer(layer_size[i], layer_size[i+1]))


    def __call__(self, input): # input is input/data vector x
        for layer in self.layers:
            x = layer(input)
        return x
    
    def parameters(self):
        parameters = [] 
        for layer in self.layers:
            ps = layer.parameters() 
            parameters.extend(ps) 
        return parameters 

In [91]:
# MLP Sample Usage
x = [1.0, 4.0, -3.0]
n = MLP(3,[5,3,1])
n(x) 

[Value(data=-0.3224122345161351)]

In [92]:
# Sample dataset 
x_data = [
    [1.0, 4.0, -3.0],
    [2.0, -1.0, 1.0], 
    [0.5, 3.0, -2.0],
    [4.0, 0.5, -2.0],
]

labels = [1.0, 1.0, -1.0, -1.0] # Desired target value (suppose this is a binary task)

In [None]:
num_epochs = 20 
step_size = 0.01 

for k in range (num_epochs): 
    pred = [n(x) for x in x_data] #forward pass
    loss = sum((y_pred - y_true)**2 for y_true, y_pred in zip(labels, pred))
    for p in n.parameters(): # zero out gradietns
        p.grad = 0.0 

    loss.backward() # backward pass
    
    for p in n.parameters(): # updating parameters 
        p.data += step_size * (-p.grad) 

    print(f"Epoch {k}: {loss.data}")