In [1]:
import numpy as np
import random
import math

In [2]:
# the very basic value class upon which activations are performed, and can backpropagate through to their parents
# we store each value as a member of a DAG with the gradient propagating through its parents
class Value():
    def __init__(self, data, _op='', _parents=(), ):
        self._op = _op
        self._backprop = lambda: None
        self.grad = 0.0
        self.data = data
        self._parents = _parents       
    def __repr__(self):
        return f"Value(data={self.data})"        
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, _op='+', _parents=(self, other))
        def _backprop():
            delta = out.grad
            self.grad += 1.0*delta
            other.grad += 1.0*delta
        out._backprop = _backprop
        return out
    def __radd__(self, other):
        return self + other
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, _op='*', _parents=(self, other))
        def _backprop():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backprop = _backprop
        return out
    def __rmul__(self, other):
        return self * other
    def __neg__(self):
        return (-1)*self;      
    def __sub__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        return self + (-other)
    def __rsub__(self, other):
        return (-self) + (other)
    def __pow__(self, other):
        assert isinstance(other, (int, float))
        out = Value(self.data**other, _parents=(self, ), _op=f"^{other}")
        def _backprop():
            self.grad += other*(self.data**(other - 1))*out.grad
        out._backprop = _backprop
        return out
    def __truediv__(self, other):
        return self*(other**-1)
    #unipolar
    def sigmoid(self):
        x = self.data
        sig = 1 / (1 + math.exp(-x))
        out = Value(sig, _op='sig', _parents=(self, ))
        # the derivative of o = sigmoid is o*(1 - o)
        def _backprop():
            delta = out.grad * (1 - sig) * sig
            self.grad += delta
        out._backprop = _backprop        
        return out  
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)
        out = Value(t, _op='tanh', _parents=(self, ))
        def _backprop():
            delta = out.grad * (1 - t**2)
            self.grad += delta
        out._backprop = _backprop
        return out
    def backprop(self):

        # toposort
        topo = []
        visited = set()
        def topo_sort(v):
            if v not in visited:
                visited.add(v)
                for parent in v._parents:
                    topo_sort(parent)
                topo.append(v)
        topo_sort(self)
        self.grad = 1
        for node in reversed(topo):
            node._backprop()
        

In [3]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._parents:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right

  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ data %.4f | grad %.4f }" % ( n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [28]:
# Pass in the dimension of input and weights initialized accordingly, single bias
class Neuron():
    def __init__(self, idim, activation='tanh'):
        self.weights = [Value(random.uniform(-1, 1)) for _ in range(idim)]
        self.bias = Value(random.uniform(-1, 1))
        self.activation = activation
    def __repr__(self):
        return f"weights: {self.weights} \n bias: {self.bias} \n activation: {self.activation}"
    def parameters(self):
        return [self.bias] + self.weights
    def forward(self, x):
        net = sum((wi*xi for wi, xi in zip(self.weights, x)), self.bias)
        activation_func = getattr(net, self.activation, None)
        if activation_func is None:
            raise ValueError("Invalid activation")
        out = activation_func()
        return out

class Layer():
    def __init__(self, idim, odim, activation):
        self.neurons = [Neuron(idim, activation) for _ in range(odim)]
    def __repr__(self):
        final = f""
        for n in self.neurons:
            final += f"neuron: {repr(n)}\n" 
        return final
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    def forward(self, x):
        outvec = [n.forward(x) for n in self.neurons]
        return outvec[0] if len(outvec) == 1 else outvec

#expects an 2D array representing data points, this is a scalar valued library after all
class MLP():
    def __init__(self, idim, odims, learning_rate, x_in, y_target, activation='tanh', epochs = 20):
        dims = [idim] + odims
        self.layers = [Layer(dims[i], dims[i+1], activation) for i in range(len(odims))]
        self.x_in = x_in
        self.y_target = y_target
        self.eta = learning_rate
        self.epochs = epochs
        self.preds = None
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    def train(self):
        for i in range(self.epochs):
            # make prediction
            ypred = [self.forward(x) for x in self.x_in]
            
            #calculate loss - using a simple squared loss here
            loss = sum((yp - yt)**2 for yp, yt in zip(ypred, self.y_target))
            # minimize the loss using backprop and bump parameters

            loss.backprop()
            #update
            for p in self.parameters():
                p.data -= self.eta*p.grad
                p.grad = 0.0            
            print(f"Epoch {i}: Loss : {loss.data}")
            self.preds = ypred

In [26]:
xt = [[2,0,3,0,-1,0],
      [3,0,-1.0,0,5],
      [0.5,1.0,1.0],
      [1.0,1.0,-1.0]]
yt = [1.0, -1.0, -1.0, 1.0]
mlp = MLP(idim=3, odims=[4, 4, 1], learning_rate=0.1, x_in=xt, y_target=yt,activation='tanh', epochs=1000)

In [27]:
mlp.train()

Epoch 0: Loss : 4.021491520943238
Epoch 1: Loss : 3.815834088673458
Epoch 2: Loss : 3.584942988680645
Epoch 3: Loss : 3.324742707812087
Epoch 4: Loss : 3.107897407706607
Epoch 5: Loss : 2.994085530490622
Epoch 6: Loss : 2.930887234290701
Epoch 7: Loss : 2.8813408712324713
Epoch 8: Loss : 2.839489260194895
Epoch 9: Loss : 2.802365327902342
Epoch 10: Loss : 2.7676869218630658
Epoch 11: Loss : 2.7333943322826384
Epoch 12: Loss : 2.6972564694442234
Epoch 13: Loss : 2.65637559999179
Epoch 14: Loss : 2.6063587981812244
Epoch 15: Loss : 2.539699318243554
Epoch 16: Loss : 2.442535776416516
Epoch 17: Loss : 2.2895750882344728
Epoch 18: Loss : 2.046234140566972
Epoch 19: Loss : 1.7037077084434369
Epoch 20: Loss : 1.318468053968852
Epoch 21: Loss : 0.9783347231539403
Epoch 22: Loss : 0.7328358799819346
Epoch 23: Loss : 0.5687562400407039
Epoch 24: Loss : 0.4576807828670676
Epoch 25: Loss : 0.3794231814416531
Epoch 26: Loss : 0.322194836986225
Epoch 27: Loss : 0.27912643633901196
Epoch 28: Loss : 

In [10]:
mlp.preds

[Value(data=0.9519890821879483),
 Value(data=-0.9736512099673912),
 Value(data=-0.9472615145436858),
 Value(data=0.9529255850336709)]