<a href="https://colab.research.google.com/github/YassGan/MicroGrad/blob/main/micrograd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1386]:
import math
import numpy as np
import matplotlib.pyplot as plt


In [1387]:
def f(x):
  return 3*x**2 - 4*x + 1

In [1388]:
xs=np.arange(-6,6,0.25)

xy=[]
for x in xs:
  xy.append(f(x))



In [1389]:
class Value:
  def __init__(self, data, _children=(),_op="",label=""):
    self.data = data
    self._prev=set(_children)
    self._op=_op
    self.grad=0.0
    self.label=label
    self.grad=0
    self._backward=lambda:None




  def __repr__(self):
    return f'Value  {self.data}'





  def __add__(self,other):
    other = other if isinstance(other, Value) else Value(other)
    out=Value(self.data+other.data,(self,other),'+')

    def _backward():
      self.grad+=1.0*out.grad
      other.grad+=1.0*out.grad
    out._backward=_backward
    return out

  def __sub__(self, other): # self - other
      other = other if isinstance(other, Value) else Value(other)
      out = Value(self.data - other.data, (self, other), '-')
      def _backward():
          self.grad += 1.0 * out.grad
          other.grad -= 1.0 * out.grad
      out._backward = _backward
      return out


  def __mul__(self,other):
    other = other if isinstance(other, Value) else Value(other)
    out=Value(self.data*other.data,(self,other),'*')

    def _backward():
      self.grad+=other.data*out.grad
      other.grad+=self.data*out.grad
    out._backward=_backward
    return out

  def __rmul__(self, other): # other * self
    return self * other

  def __pow__(self, other):
      assert isinstance(other, (int, float)), "only supporting int/float powers for now"
      out = Value(self.data**other, (self,), f'**{other}')

      def _backward():
          self.grad += other * (self.data**(other-1)) * out.grad
      out._backward = _backward

      return out


  def tanh(self):
    x=self.data
    t=(math.exp(2*x)-1)/(math.exp(2*x)+1)
    out= Value(t,(self,),'tanh')

    def _backward():
      self.grad+=(1-t**2)*out.grad
    out._backward=_backward
    return out

  def backward(self):
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    self.grad=1
    for node in reversed (topo):
      node._backward()

In [1390]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right

  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [1391]:
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
e = a*b; e.label = 'e'
d = e + c; d.label = 'd'
f = Value(-2.0, label='f')
L = d * f; L.label = 'L'


In [1392]:
L.grad=1
f.grad=d.data*L.grad
d.grad=f.data*L.grad
e.grad=d.grad*1
c.grad=d.grad*1
a.grad=b.data*e.grad
b.grad=a.data*e.grad


In [1393]:
# draw_dot(L)

In [1394]:
a.data+=0.01  *a.grad

f.grad=d.data*L.grad
d.grad=f.data*L.grad
e.grad=d.grad*1
c.grad=d.grad*1
a.grad=b.data*e.grad
b.grad=a.data*e.grad

e.data=a.data*b.data
d.data=e.data+c.data
L.data=d.data*f.data

In [1395]:
#inputs x1, x2
x1=Value(2.0, label='x1')
x2=Value(0.0, label='x2')

#weights w1, w2
w1=Value(-3.0, label='w1')
w2=Value(1.0, label='w2')

#bias of the neuron
b=Value(6.8813735870195432, label='b')

#x1*w1 + x2*w2 +b
x1w1=x1*w1; x1w1.label="x1*w1"
x2w2=x2*w2; x2w2.label="x2*w2"
x1w1x2w2=x1w1+x2w2;x1w1x2w2.label=" x1*w1 + x2*w2 "
n=x1w1x2w2+b; n.label='n'
o=n.tanh(); o.label='o'



In [1396]:
x1w1x2w2.grad=0.5
b.grad=0.5
x1w1.grad=0.5
x2w2.grad=0.5

x1.grad=w1.data*x1w1.grad
w1.grad=x1.data*x1w1.grad
x2.grad=w2.data*x2w2.grad
w2.grad=x2.data*x2w2.grad


In [1397]:
import torch
import random

In [1398]:
x1=torch.Tensor([2.0]).double() ;x1.requires_grad=True
x2=torch.Tensor([0.0]).double() ;x2.requires_grad=True
w1=torch.Tensor([-3.0]).double() ;w1.requires_grad=True
w2=torch.Tensor([1.0]).double() ;w2.requires_grad=True
b=torch.Tensor([6.8813735870195432]).double() ;b.requires_grad=True
n=x1*w1+x2*w2+b
o=torch.tanh(n)

print(o.data.item())

0.7071066904050358


In [1399]:
class Neuron:
  def __init__(self, nin):
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))

  def __call__(self,x):
    # w * x + b
    act = sum((wi*(xi) for wi,xi in zip(self.w,x)), self.b)
    out = act.tanh()
    return out

  def parameters(self):
    return self.w + [self.b]




class Layer:
  def __init__(self,nin,nout):
    self.neurons=[Neuron(nin) for _ in range(nout)]

  def __call__(self,x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs)==1 else outs

  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]



class MLP:
  def __init__(self,nin,nouts):
    sz=[nin]+nouts
    self.layers=[Layer(sz[i],sz[i+1]) for i in range(len(nouts))]

  def __call__(self,x):
    for layer in self.layers:
      x=layer(x)
    return(x)

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

x=[2.0,3.0,-1.0]
n=MLP(3,[4,4,1])
n(x)

Value  -0.9392080733690684

In [1400]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1, 1, -1],
]

ys = [1, -1, -1, 1]

n = MLP(3, [4, 4, 1])  # Use a unique name
ypred = [n(x) for x in xs]
print(ypred)


[Value  -0.34266683775384454, Value  0.45789913225001505, Value  0.1262043668227203, Value  -0.15722196838506536]


In [1401]:
ypred = [n(x) for x in xs]

loss=sum( ((yout-ygt)**2 for ygt, yout in zip(ys,ypred)), start=Value(0))

loss

Value  6.5357230769828245

In [1402]:
loss.backward()

In [1403]:
n.layers[0].neurons[0].w[0].grad

0.19121395295887167

In [1404]:
n.layers[0].neurons[0].w[0].data

0.4442913474129153

In [1405]:
n.parameters()

[Value  0.4442913474129153,
 Value  -0.4128558654549841,
 Value  0.6849550761744967,
 Value  -0.14538904556798427,
 Value  -0.8569567812120065,
 Value  -0.9306617806235871,
 Value  -0.49809199573099905,
 Value  0.11004853305427753,
 Value  0.021365583053692072,
 Value  0.16472685280968768,
 Value  0.9517833061521068,
 Value  0.7893860363510743,
 Value  0.2455918680599143,
 Value  0.674452485165574,
 Value  -0.4025757912203176,
 Value  -0.7700095861837262,
 Value  0.013736615386383155,
 Value  0.19288972012438954,
 Value  0.6290415458906455,
 Value  0.3184842864540891,
 Value  -0.1844744664041329,
 Value  -0.4348086782466942,
 Value  -0.6280000516254765,
 Value  0.17155411863939962,
 Value  0.3398982742838643,
 Value  -0.24420509126362688,
 Value  -0.6575222445742874,
 Value  -0.3629966018911839,
 Value  0.5066294336672645,
 Value  0.41810797467256555,
 Value  0.7582700650521106,
 Value  -0.8558570070018721,
 Value  -0.7522020546466819,
 Value  -0.13596315037573037,
 Value  0.5095346826

In [1406]:
ypred = [n(x) for x in xs]

loss=sum( ((yout-ygt)**2 for ygt, yout in zip(ys,ypred)), start=Value(0))

loss

Value  6.5357230769828245

In [1407]:
for k in range(50):
  #forward pass
  ypred=[n(x) for x in xs]
  loss=sum( ((yout-ygt)**2 for ygt, yout in zip(ys,ypred)), start=Value(0))

  #backward pass
  for p in n.parameters():
    p.grad=0
  loss.backward()

  #update
  for p in n.parameters():
    p.data+=-0.1*p.grad

  print(k,loss.data)

0 6.5357230769828245
1 3.2282958579045973
2 1.802645895404729
3 0.7620652291820904
4 0.0717466287739903
5 0.058570184201350406
6 0.04932934521249152
7 0.04249592488815938
8 0.03724617969122736
9 0.03309361748571302
10 0.029731826465630863
11 0.02695816107165272
12 0.024633303568429632
13 0.022658383796849542
14 0.020961345596458486
15 0.019488471942637448
16 0.018198926796004527
17 0.017061131933771526
18 0.016050298095265824
19 0.015146703700775548
20 0.014334470227940469
21 0.013600675075955566
22 0.012934698420889783
23 0.012327735264181855
24 0.01177242602548759
25 0.011262573476553364
26 0.010792923419158669
27 0.010358993012631282
28 0.009956935129766819
29 0.009583430243508462
30 0.009235599557876756
31 0.008910934681881961
32 0.008607240295132622
33 0.00832258709727298
34 0.008055272958321825
35 0.0078037906544908995
36 0.0075668009269440105
37 0.007343109869572515
38 0.007131649857963749
39 0.006931463391060077
40 0.0067416893410372405
41 0.006561551204140868
42 0.006390347021

In [1408]:
ypred

[Value  0.9687320541636545,
 Value  -0.9760660542310255,
 Value  -0.954605843986986,
 Value  0.9577614967991464]