### Scalar Tensor

Defining a `Tensor` class which wraps a scalar value and supports the basic operations of addition and multiplication and respective gradients for backpropagation.  

Also add support for the hyperbolic tangent which will be used as activation fuction.

In [1]:
import math
from collections import deque

class Tensor:
    def __init__(self, value, _inputs=()):
        self.value = value
        self.grad = 0.0
        self._inputs = _inputs
        self._backward = lambda: None

    def __repr__(self):
        return f"Tensor( Value: {self.value}, Grad: {self.grad} )"

    def __add__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        result = Tensor(self.value + other.value, (self, other))

        def _backward():
            self.grad +=  result.grad
            other.grad += result.grad

        result._backward = _backward
            
        return result
        
    def __mul__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        result = Tensor(self.value * other.value, (self, other))

        def _backward():
            self.grad += result.grad * other.value
            other.grad += result.grad * self.value
        result._backward = _backward

        return result
    
    def tanh(self):
        
        result = Tensor(math.tanh(self.value), (self,))

        def _backward():
            self.grad += result.grad * (1 - result.value ** 2)
        result._backward = _backward

        return result
    
    def backward(self):
        # topological sort
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._inputs:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        # backward pass
        self.grad = 1
        for v in reversed(topo):
            v._backward()
    

A simple example `x1 * w1 + x2 * w2 + b`:



In [2]:
# Inputs
x1 = Tensor(2.0)
x2 = Tensor(0.0)

# Weights
w1 = Tensor(-3.0)
w2 = Tensor(1.0)

# Bias
b = Tensor(6.88137358701954)

# x1w1 + x2w2 + b
y = (x1 * w1 + x2 * w2 + b).tanh()

print("y", y.value)

y.backward()

print("x1:", x1)
print("w1:", w1)
print("x2:", x2)
print("w2:", w2)

y 0.7071067811865458
x1: Tensor( Value: 2.0, Grad: -1.5000000000000073 )
w1: Tensor( Value: -3.0, Grad: 1.0000000000000049 )
x2: Tensor( Value: 0.0, Grad: 0.5000000000000024 )
w2: Tensor( Value: 1.0, Grad: 0.0 )


Verifying results above with PyTorch:

In [3]:
import torch

x1 = torch.Tensor([2.0]).double()                       ; x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()                       ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()                      ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                       ; w2.requires_grad = True
b = torch.Tensor([6.88137358701954]).double()           ; b.requires_grad = True

y = torch.tanh(x1 * w1 + x2 * w2 + b)

print(y.item())

y.backward()

print("x1:", x1.grad.item())
print("w1:", w1.grad.item())
print("x2:", x2.grad.item())
print("w2:", w2.grad.item())

0.7071066904050358
x1: -1.5000003851533106
w1: 1.0000002567688737
x2: 0.5000001283844369
w2: 0.0


### Neuron & Multilayer Perceptron (MLP)

Now let's use the `Tensor` class to model an artificial `Neuron` and a multilayer percpetron (`MLP`)

![neuron model](./neuron_model.jpg)

In [4]:
import random

class Neuron:
    def __init__(self, nin):
        self.weights = [Tensor(random.uniform(-1, 1)) for _ in range(nin)]
        self.bias = Tensor(random.uniform(-1, 1))

    def __call__(self, inputs):
        return sum([w * i for w, i in zip(self.weights, inputs)], self.bias).tanh()
    
    def parameters(self):
        return self.weights + [self.bias]
    
class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        results = [neuron(x) for neuron in self.neurons]
        return results[0] if len(results) == 1 else results
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
class MLP:
    def __init__(self, nin, nouts):
        dimensions = [nin] + nouts
        self.layers = [Layer(dimensions[i], dimensions[i+1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

Applying it on a simple example:

In [5]:
n = MLP(3, [4, 4, 1])

# Input values
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [-1.0, 1.0, -1.0]
]

# Target values
ys = [1.0, -1.0, -1.0, 1.0]

In [7]:
for k in range(10):

    # forward pass
    ypred = [n(x) for x in xs]
    loss = sum([(prediction + target * -1) * (prediction + target * -1) for target, prediction in zip(ys, ypred)], Tensor(0))

    # backward pass
    for p in n.parameters():
        p.grad = 0
    loss.backward()

    # update parameters
    for p in n.parameters():
        p.value += -0.05 * p.grad 

    print(k, loss)

ypred

0 Tensor( Value: 0.0025013354302307255, Grad: 1 )
1 Tensor( Value: 0.00248839315820144, Grad: 1 )
2 Tensor( Value: 0.0024755830264202986, Grad: 1 )
3 Tensor( Value: 0.0024629030309516005, Grad: 1 )
4 Tensor( Value: 0.002450351208047471, Grad: 1 )
5 Tensor( Value: 0.002437925633147754, Grad: 1 )
6 Tensor( Value: 0.0024256244199096226, Grad: 1 )
7 Tensor( Value: 0.002413445719265785, Grad: 1 )
8 Tensor( Value: 0.002401387718510419, Grad: 1 )
9 Tensor( Value: 0.0023894486404117572, Grad: 1 )


[Tensor( Value: 0.9660081266522845, Grad: -0.06798374669543095 ),
 Tensor( Value: -0.9946950235905366, Grad: 0.010609952818926738 ),
 Tensor( Value: -0.9723795825618414, Grad: 0.055240834876317146 ),
 Tensor( Value: 0.9789531248741878, Grad: -0.04209375025162432 )]