### Create first model uasing torch

In [305]:
import torch

In [306]:
#inputs
x1 = Value(2.0,label='x1')
x2 = Value(0.0, label='x2')
#Weights w1,w2
w1= Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
#bias of neuron
b = Value(6.88137, label='b')

In [307]:
from torch import Tensor
x1 = Tensor([2.0]).double(); x1.requires_grad = True
x2 = Tensor([0.0]).double(); x2.requires_grad = True

w1 = Tensor([-3.0]).double(); w1.requires_grad = True
w2 = Tensor([1.0]).double(); w2.requires_grad = True

b = Tensor([6.88137]).double()

n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

o.backward()
print('*******************************')
print(f'o.data : {o.data.item()}')

print(f'x2 : {x2.grad.item()}')
print(f'w2 : {w2.grad.item()}')
print(f'x1 : {x1.grad.item()}')
print(f'w1 : {w1.grad.item()}')


*******************************
o.data : 0.7071050214706146
x2 : 0.5000024886110417
w2 : 0.0
x1 : -1.500007465833125
w1 : 1.0000049772220834


In [308]:
import math
class Value:
    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self.grad = 0
        self._backward = lambda: None
        # print(f'type of _children : {type(_children)}')
        self._prev= set(_children)
        self._op = _op
        self.label= label
    
    def __repr__(self) -> str:
        return f"Value(data={self.data})"
    
    def __add__(self, other):
        """
            Assumng other either of Value type or number and convert it to Value post typecheck
        """
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data  + other.data, (self, other), '+')
        
        def _backward():
            self.grad += 1.0* out.grad
            other.grad += 1.0* out.grad
        out._backward = _backward
    
        return out
    
    def __neg__(self):
        return self *(-1)
    
    def __sub__(self, other):
        return self + (-other)
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data *  out.grad
        out._backward = _backward
        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only int and flost are supported"
        out = Value(self.data**other, (self,), f'**{other}')
        
        def _backward():
            self.grad += other * self.data **(other -1) * out.grad
        out._backward =_backward
        return out
    
    def __rmul__(self, other):
        return self * other
    
    def __truediv__(self, other):
        return self* other**-1
    
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out= Value(t,(self,), 'tanx')
        
        def _backward():
            self.grad += (1- t**2 )* out.grad
        out._backward = _backward 
        return out
    
    def exp(self):
        x = self.data
        t = math.exp(x)
        out= Value(t,(self,), 'exp')
        
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward 
        return out

    def backward(self):
        topological_sorted = []
        visited = set()
        def sort_topological_order(v):
            if v not in visited:
                visited.add(v)
                for child  in v._prev:
                    sort_topological_order(child)
                topological_sorted.append(v)
        sort_topological_order(self)
        self.grad = 1.0
        for node in reversed(topological_sorted):
            node._backward()

#### torch giving the same result as out micrograd

In [309]:
from graphviz import Digraph
def trace(root):
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges

def draw_dot(root, format='svg', rankdir='LR'):
    """
    format: png | svg | ...
    rankdir: TB (top to bottom graph) | LR (left to right)
    """
    assert rankdir in ['LR', 'TB']
    nodes, edges = trace(root)
    dot = Digraph(format=format, graph_attr={'rankdir': rankdir}) #, node_attr={'rankdir': 'TB'})
    
    for n in nodes:
        dot.node(name=str(id(n)), label = "{%s |data %.4f | grad %.4f }" % (n.label,n.data, n.grad), shape='record')
        if n._op:
            dot.node(name=str(id(n)) + n._op, label=n._op)
            dot.edge(str(id(n)) + n._op, str(id(n)))
    
    for n1, n2 in edges:
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)
    
    return dot

In [310]:
import random
from typing import Any
class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))
    
    def __call__(self, x):
        # w*x +b
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
        

class MLP:
    def __init__(self, nin, nouts):
        size = [nin] + nouts
        self.layers = [Layer(size[i], size[i+1]) for i in range(len(nouts))]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
        


In [311]:
x = [2.0, 3.0, -1.0]
n = MLP(3, [4,4,1])
n(x)
len(n.parameters())

41

In [312]:
#inputs
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

# desired targets
ys = [1.0, -1.0, -1.0, 1.0]


In [318]:
for k in range(100):
    ##forward pass
    ypred = [n(x) for x in xs]
    loss = sum([(yout - ygt)**2 for ygt, yout in zip(ys,ypred)], Value(0.0))
    
    ## backward pass
    for p in n.parameters():
        p.grad = 0
    loss.backward()
    
    #update
    for p in n.parameters():
        p.data += -0.05 * p.grad
    
    print(f'step : {k} | loss : {loss.data}')
    

step : 0 | loss : 0.005244972867696429
step : 1 | loss : 0.005215521714547824
step : 2 | loss : 0.005186379025010501
step : 3 | loss : 0.005157540063384557
step : 4 | loss : 0.005129000189666092
step : 5 | loss : 0.005100754857152364
step : 6 | loss : 0.005072799610118431
step : 7 | loss : 0.005045130081562789
step : 8 | loss : 0.0050177419910193005
step : 9 | loss : 0.004990631142433717
step : 10 | loss : 0.004963793422101856
step : 11 | loss : 0.004937224796668101
step : 12 | loss : 0.00491092131118165
step : 13 | loss : 0.004884879087208851
step : 14 | loss : 0.004859094320999526
step : 15 | loss : 0.0048335632817058695
step : 16 | loss : 0.0048082823096516295
step : 17 | loss : 0.004783247814650577
step : 18 | loss : 0.004758456274372029
step : 19 | loss : 0.004733904232752246
step : 20 | loss : 0.004709588298450148
step : 21 | loss : 0.004685505143345901
step : 22 | loss : 0.004661651501080942
step : 23 | loss : 0.004638024165638316
step : 24 | loss : 0.004614619989961542
step : 2

In [319]:
ypred

[Value(data=0.9775453380370166),
 Value(data=-0.9781493380706406),
 Value(data=-0.9629489118786985),
 Value(data=0.9688213455285437)]