In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
class Value:
  #constructor
  def __init__(self, data, _children=(),_op='', label=''):
    self.data = data
    self._prev = set(_children)
    self._op = _op
    self.label = label
    self.grad = 0.0
    self._backward = lambda: None

  #representation
  def __repr__(self):
    return f"Value(data={self.data})"

  #addition
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self,other), '+')

    #backprop in addition
    def _backward():
      self.grad += 1.0*out.grad
      other.grad += 1.0*out.grad
    out._backward = _backward
    return out

  #multiplication
  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    #backprop in multi
    def _backward():
      self.grad += other.data*out.grad
      other.grad += self.data*out.grad
    out._backward = _backward
    return out


  #subtraction
  def __sub__(self, other):
    return self + (-other)

  def __rmul__(self,other):
    return self * other

  def __rsub__(self,other):
    return other + (-self)

  def __radd__(self, other):
    return self + other

  def __neg__(self):
    return self*(-1)


  #tanh
  def tanh(self):
    x = self.data
    t = (math.exp(2*x)-1)/(math.exp(2*x)+1)
    out = Value(t, (self, ), 'tanh')
    #backprop in tanh
    def _backward():
      self.grad += (1-t**2)*out.grad
    out._backward = _backward
    return out

  #power
  def __pow__(self,other):
    assert isinstance(other, (int, float)), "only supporting int/float for now"
    out = Value(self.data**other, (self, ), f'**{other}')
    #backprop in pow
    def _backward():
      self.grad += other* (self.data**(other-1)) * out.grad
    out._backward = _backward
    return out

  #exp
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    #backprop in exp
    def _backward():
      self.grad += out.data * out.grad
    out._backward = _backward
    return out

  def __truediv__(self, other):
    return self * other**-1

  #using topological sort to get the order in which to apply backprop in and then using the _backward in each node to calc grad
  def backward(self):
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    #setting out.grad to 1 to avoid 0 multi in _backward fn
    self .grad = 1.0
    for node in reversed(topo):
      node._backward()




In [None]:
a = Value(2.0)
b = Value(-3.0)
c = Value(10.0)
d = a.__mul__(b).__add__(c)
d._prev
d._op

'+'

In [None]:
#inputs
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')
#weights
w1 = Value(-3.0, label='w2')
w2 = Value(1.0, label='w1')
#bias
b = Value(5.0, label='b')
#cellbody-content
X1 = x1*w1; X1.label = 'x1*w1'
X2 = x2*w2; X2.label = 'x2*w2'
X1X2 = X1 + X2; X1X2.label = 'x1w1 + x2w2'
n = X1X2 +b; n.label = 'n'
out = n.tanh(); out.label = 'out'
out.backward()

In [None]:
#results:
print(x1.grad, w1.grad, x2.grad, w2.grad)


-1.2599230248420783 0.8399486832280523 0.41997434161402614 0.0


In [None]:
import torch
#double is used to ensure that dtype is float64 instead of pytorch default float32
x1 = torch.Tensor([2.0]).double()
x2 = torch.Tensor([0.0]).double()
w1 = torch.Tensor([-3.0]).double()
w2 = torch.Tensor([1.0]).double()
b = torch.Tensor([5.0]).double()
#in normal cases gradient for inputs aren't calculated for efficiency so we have to enforce it
x1.requires_grad = True
x2.requires_grad = True
w1.requires_grad = True
w2.requires_grad = True
b.requires_grad = True
n = x1*w1 + x2*w2 + b
out = torch.tanh(n)
out.backward()

In [None]:
#results in pytorch
print(x1.grad.item(), w1.grad.item(), x2.grad.item(), w2.grad.item(),out)


-1.2599230248420783 0.8399486832280523 0.41997434161402614 0.0 tensor([-0.7616], dtype=torch.float64, grad_fn=<TanhBackward0>)


In [None]:
import random
class Neuron:
  def __init__(self,nin):
    #setting random weights an bias initially
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))

  def __call__(self,x):
    #summing up wx+b and using activation
    #sum function has an additional parameter start
    act = sum((wi*xi for wi,xi in zip(self.w,x)), self.b)
    out = act.tanh()
    return out

  def parameters(self):
    return self.w + [self.b]

#a layer is a list of neurons
class Layer:
  def __init__(self,nin, nout):
    self.neurons = [Neuron(nin) for _ in range(nout)]
  def __call__(self,x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs

  def parameters(self):
    params = []
    for n in self.neurons:
      ps = n.parameters()
      params.extend(ps)
    return params

#defining a multilayer perceptron
class MLP:
  def __init__(self, nin , nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

  def __call__(self,x):
    for layer in self.layers:
      x = layer(x)
    return x

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]


In [None]:
x = [2.0, 3.0, -1.0]
n = MLP(3,[4,4,1])
n(x)

Value(data=-0.887858337045877)

In [None]:
#testing for values
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]

ys = [1.0, -1.0, -1.0, 1.0]

In [None]:
for i in range(20):
  #forward pass
  ypred = [n(x) for x in xs]
  #calculating loss
  loss = sum((yout-ygt)**2 for ygt, yout in zip(ys,ypred))
  #backprop
  #resetting the gradients before each iteration
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  #gradient descent update
  #updating the parameters using grad of loss function
  #alpha is the learning rate
  alpha = 0.05
  for p in n.parameters():
    p.data += -alpha*p.grad
  print(i,loss.data)

0 0.005964289499783313
1 0.00586482969638784
2 0.005768801058019966
3 0.005676022458715054
4 0.005586325593568633
5 0.005499553848669623
6 0.005415561289428992
7 0.005334211753015049
8 0.0052553780325484195
9 0.0051789411423609644
10 0.005104789655027485
11 0.005032819102081133
12 0.004962931431350638
13 0.004895034514743188
14 0.0048290417010549746
15 0.004764871409050915
16 0.004702446756621412
17 0.004641695222318143
18 0.004582548335998985
19 0.004524941395685565


In [None]:
ypred

[Value(data=0.969770328718326),
 Value(data=-0.971719434547973),
 Value(data=-0.96898590154409),
 Value(data=0.9569948411865711)]

### Thank you Andrej Karpathy for this awesome walkthrough on how to make neural networks from scratch!

https://www.youtube.com/pop-up-player/VMj-3S1tku0?autoplay=1&start=5703
