In [1]:
import random
import math

class Value:
    def __init__(self, data, _children=(), _op="", label=""):
        self.data = data
        self.grad = 0.0
        self._prev = set(_children)
        self._backward = lambda: None
        self._op = _op
        self.label = label
        
    def __repr__(self):
        return f"Value(data={self.data})"
    
    def __add__(self, other):
        is_instance = isinstance(other, Value)
        if not is_instance:
            other = Value(other)
        out = Value(self.data + other.data, (self, other), "+")
        
        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward
        
        return out
    
    def __radd__(self, other):
        return self + other
    
    def __mul__(self, other):
        is_instance = isinstance(other, Value)
        if not is_instance:
            other = Value(other)
        out = Value(self.data * other.data, (self, other), "*")
        
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        
        return out
    
    def __pow__(self,other):
        out = Value(self.data ** other, (self, ), "pow")
        
        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        
        out._backward = _backward
        #Pravim *out.grad zashtoto e chain rule i ni trqbva da umnojim po negoviqt gradient, demek local gradient
        # i out.grad
        return out
    
    def __rmul__(self, other):
        return self * other
    
    def tanh(self):
        tanh = (math.exp(2*self.data) - 1 ) / (math.exp(2*self.data) + 1)
        
        out = Value(tanh, (self, ), "tanh")
        
        def _backward():
            self.grad += (1 - tanh ** 2) * out.grad
        out._backward = _backward
        
        return out
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rmul__(self, other):
        return self * other
    
    
    def exp(self):
        expon_data = math.exp(self.data)
        out = Value(expon_data, (self, ), "exp")
        
        def _backward():
            self.grad += out.data * out.grad

        out._backward = _backward()
        
        return out
    
    def relu(self):
        out = Value(0 if self.data < 0 else self.data, (self, ), "relu")
        
        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward
        return out
    
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
            for child in v._prev:
                build_topo(child)
            topo.append(v)
        build_topo(self)
        self.grad = 1
        for node in reversed(topo):
            node._backward()

In [2]:
class Neuron:
    def __init__(self, n_inputs, nonlin=True):
        self.weights = [Value(random.uniform(-1,1)) for _ in range(n_inputs)]
        self.bias = Value(0)
        self.nonlin = nonlin
    
    def parameters(self):
        return self.weights + [self.bias]
        
    def __call__(self, x):
        # tanh(Input * weights + bias)
        #print(self.nonlin)
        act = sum((wi*xi for wi,xi in zip(self.weights, x)), self.bias)
        return act.relu() if self.nonlin else act

In [3]:
class Layer():

    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

In [4]:
class MLP():

    def __init__(self, n_inputs, n_outputs):
        sz = [n_inputs] + n_outputs
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(n_outputs)-1) for i in range(len(n_outputs))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [5]:
mlp = MLP(3, [4,4,1])

In [6]:
xs = [
    [2.0, 3.0, -1.0],
    [4.0, 6.0, -2.2],
    [8.3, 1.4, -1.9],
    [1.1, 5.8, 1.0],
]

In [7]:
ys = [1.0, -3.0, -1.0, 1.0]

In [8]:
def train():
    for epoch in range(3000):
        ypred = [mlp(x) for x in xs]
        loss = sum((yp - y) ** 2 for yp, y in zip(ypred, ys))
        for n in mlp.parameters():
            n.grad = 0.0
        loss.backward()
        for p in mlp.parameters():
            p.data += -0.0002 * p.grad
        if(epoch % 200 == 0):
            print(loss)
    return ypred

In [9]:
y_predicted = train()

Value(data=18.096881163539607)
Value(data=3.5530841425642383)
Value(data=2.173890091189958)
Value(data=1.0223679349405288)
Value(data=0.23208099076841546)
Value(data=0.10575184424636486)
Value(data=0.004856782119797147)
Value(data=0.0007539036988299458)
Value(data=0.00010720966882858047)
Value(data=1.4858642771425692e-05)
Value(data=7.114519429936635e-06)
Value(data=5.849400184690581e-07)
Value(data=1.5404427335475734e-08)
Value(data=4.054781726725657e-09)
Value(data=4.1942462701935236e-10)


In [10]:
rounded_values = [round(val.data) for val in y_predicted]
rounded_values

[1, -3, -1, 1]

In [11]:
ys

[1.0, -3.0, -1.0, 1.0]