In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def f(x):
    return 3*x**2 - 4*x + 5

In [3]:
f(3.0)

20.0

In [4]:
xs = np.arange(-5, 5, 0.25)
ys = f(xs)
ys

array([100.    ,  91.6875,  83.75  ,  76.1875,  69.    ,  62.1875,
        55.75  ,  49.6875,  44.    ,  38.6875,  33.75  ,  29.1875,
        25.    ,  21.1875,  17.75  ,  14.6875,  12.    ,   9.6875,
         7.75  ,   6.1875,   5.    ,   4.1875,   3.75  ,   3.6875,
         4.    ,   4.6875,   5.75  ,   7.1875,   9.    ,  11.1875,
        13.75  ,  16.6875,  20.    ,  23.6875,  27.75  ,  32.1875,
        37.    ,  42.1875,  47.75  ,  53.6875])

In [5]:
plt.plot(xs, ys)

[<matplotlib.lines.Line2D at 0x75d025a1d010>]

In [6]:
h = 0.0000001
x = 3
f(x+h)

20.00000140000003

In [7]:
h = 0.0000001  #small increment
#slope = (y(added increment) - y(initial))/ amt of increment in x
(f(x + h) - f(x))/h

14.000000305713911

In [8]:
h = 0.00000001
x = -3
(f(x+h) - f(x))/h

-22.00000039920269

In [9]:
h = 0.00000001
x = 2/3
(f(x+h) - f(x))/h

0.0

In [10]:
# lets get more complex
a = 2.0
b = -3.0
c = 10.0
d = a*b + c
print(d)

4.0


In [11]:
h = 0.0001

#inputs
a = 2.0
b = -3.0
c = 10.0

d1 = a*b + c
#a += h
#b+=h
c+=h
d2 = a*b +c
print("d1: ",d1)
print("d2: ",d2)
print('slope: ',( (d2) - (d1) )/h)

d1:  4.0
d2:  4.0001
slope:  0.9999999999976694


In [421]:
# value object
# takes a single scalar value that it wraps and keep tracks off
class Value:
    def __init__(self,data, _children=(),_op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label= label
        
    
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self,other), '+')
        
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward =  _backward   #we try to save these fxn not call the fxn bcz these fxn's return none, just want to store the fxn
        return out

    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self,other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward
        return out

    def __pow__(self, other):
        assert isinstance(other,(int,float)),"only support int/float powers for now"
        out = Value(self.data**other, (self, ), f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward
        return out

   
    def tanh(self):
        n = self.data
        t = (math.exp(2*n)-1) / (math.exp(2*n) +1)
        out = Value(t,(self,),'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
            
        return out 

    def exp(self):
        x = self.data
        out = Value(math.exp(x),(self, ), 'exp')

        def _backward():
            self.grad += out.data * out.grad

        out._backward= _backward
        return out

    def backward(self):
        # made a graph of forward prop
        topo = []
        visited =set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        
        # set grad for output
        self.grad = 1.0

        #call _backward() in reverse of topo[]
        for node in reversed(topo):
            node._backward()
        
    def __neg__(self):
        return self * -1

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self,other):
        return self - other

    def __radd__(self,other):
        return self + other  
        
    def __rmul__(self, other):   #other * self
        return self * other

    def __truediv__(self,other):   # a / b 
        return self * other**-1

    def __rtruediv__(self,other):
        return other * self**-1

    def __repr__(self):       # wrapper function
        return f"Value(data={self.data})"
    

            
# a = Value(2.0, label = 'a')
# b = Value(-3.0, label='b')
# c = Value(10,label='c')
# #a, b
# #a + b #add two value objects #if we use this #python will internally call a.__add__(b)
# e = a*b; e.label = 'e'
# d = e+c; d.label = 'd'
# f = Value(-2.0,label='f')
# L=d*f
# L

In [100]:
a = Value(2.0)
# how to handle non Value object
a + 1  #for other -> check instance and make it Value

Value(data=3.0)

In [99]:
a = Value(2.0)
2 * a
#what it looks like
# 2.__mul__(a)  #2 can't multiply Value
# so python can't do it so it check is there an rmul in Value
# so to tackle it we use rmul 

Value(data=4.0)

In [103]:
a =Value(2.0)
b = Value(4.0)
a / b

Value(data=0.5)

In [13]:
d._prev

{Value(data=-6.0), Value(data=10)}

In [18]:
def lol():
    h = 0.001

    a = Value(2.0,label='a')
    b= Value(-3.0,label='b')
    c= Value(10.0,label='c')
    e=a*b; e.label='e'
    d=e+c; d.label='d'
    f=Value(-2.0,label='f')
    L=d*f; L.label='L'
    L1=L.data

    a = Value(2.0,label='a')
    b= Value(-3.0,label='b')
    c= Value(10.0,label='c')
    e=a*b; e.label='e'
    d=e+c; d.label='d'
    f=Value(-2.0 + h,label='f')
    L=d*f; L.label='L'
    L2=L.data

    print((L2-L1)/h)

lol()
    

3.9999999999995595


In [85]:
# input x1,x2
x1 = Value(2.0)
x2 = Value(0.0)
# weights 
w1 = Value(-3.0)
w2 = Value(1.0)
#bias of the neuron
b = Value(6.8813735870195432)
#neuron = x1w1 + x2w2 + b
x1w1 = x1*w1
x2w2 = x2*w2
x1w1x2w2 = x1w1 + x2w2
n = x1w1x2w2 + b
#activation fxn
o = n.tanh()  #exp(2z)-1 / exp(2z)+1

In [86]:
o.backward()

In [88]:
print('Grad: ')
print(o.grad)
print(n.grad)
print(x1w1x2w2.grad)
print(b.grad)
print(x1w1.grad)
print(x2w2.grad)
print(x1.grad)
print(x2.grad)
print('')
print('Data: ')
print(o.data)
print(n.data)
print(x1w1x2w2.data)
print(b.data)
print(x1w1.data)
print(x2w2.data)
print(x1.data)
print(x2.data)

Grad: 
1.0
0.4999999999999999
0.4999999999999999
0.4999999999999999
0.4999999999999999
0.4999999999999999
-1.4999999999999996
0.4999999999999999

Data: 
0.7071067811865476
0.8813735870195432
-6.0
6.881373587019543
-6.0
0.0
2.0
0.0


In [76]:
topo = []
visited =set()
def build_topo(v):
    if v not in visited:
        visited.add(v)
        for child in v._prev:
            build_topo(child)
        topo.append(v)
build_topo(o)
topo

[Value(data=6.881373587019543),
 Value(data=0.0),
 Value(data=1.0),
 Value(data=0.0),
 Value(data=-3.0),
 Value(data=2.0),
 Value(data=-6.0),
 Value(data=-6.0),
 Value(data=0.8813735870195432),
 Value(data=0.7071067811865476)]

In [70]:
o.grad= 1.0
o._backward()

In [71]:
n.grad

0.4999999999999999

In [72]:
x1w1x2w2.grad

0.0

In [73]:
n._backward()
x1w1x2w2.grad

0.4999999999999999

In [74]:
b.grad

0.4999999999999999

In [75]:
x1w1x2w2._backward()
x1w1._backward()
x2w2._backward()
x1.grad

-1.4999999999999996

In [41]:
x1.grad = w1.data * x1w1.grad  #chain rule
w1.grad = x1.data * x1w1.grad

In [40]:
x2.grad = w2.data * x2w2.grad
w2.grad = x2.data * x2w2.grad

In [39]:
x1w1.grad = 0.5
x2w2.grad=0.5

In [38]:
x1w1x2w2.grad=0.5  #for (+) same as n.grad,  n = x1w1x2w2 + b
b.grad=0.5

In [37]:
n.grad = 0.5  

In [36]:
o.grad=1.0

In [29]:
# o = tanh()
# do/dn = 1 - o**2
1 - o.data**2  #n.grad   #go up to see how manual backward is going 

0.4999999999999999

In [93]:
# bug
a = Value(3.0)
b = a + a  #2a #grad = 2.0  
#here self and other is exactly the same so we are overriding with this statement
# self.grad = 1.0 * out.grad   #a.grad
# other.grad = 1.0 * out.grad  #a.grad

b.backward()

print(b.data)
print(a.data)
print('')
print(b.grad)
print(a.grad)

# we can do #self.grad += 1.0 * out.grad
            #other.grad += 1.0 * out.grad

6.0
3.0

1.0
2.0


In [105]:
# input x1,x2
x1 = Value(2.0)
x2 = Value(0.0)
# weights 
w1 = Value(-3.0)
w2 = Value(1.0)
#bias of the neuron
b = Value(6.8813735870195432)
#neuron = x1w1 + x2w2 + b
x1w1 = x1*w1
x2w2 = x2*w2
x1w1x2w2 = x1w1 + x2w2
n = x1w1x2w2 + b
#activation fxn
#o = n.tanh()  #exp(2z)-1 / exp(2z)+1
e = (2*n).exp()
o = (e - 1) / (e + 1)
o.backward()

In [106]:
print('Grad: ')
print(o.grad)
print(n.grad)
print(x1w1x2w2.grad)
print(b.grad)
print(x1w1.grad)
print(x2w2.grad)
print(x1.grad)
print(x2.grad)
print('')
print('Data: ')
print(o.data)
print(n.data)
print(x1w1x2w2.data)
print(b.data)
print(x1w1.data)
print(x2w2.data)
print(x1.data)
print(x2.data)

Grad: 
1.0
0.5
0.5
0.5
0.5
0.5
-1.5
0.5

Data: 
0.7071067811865477
0.8813735870195432
-6.0
6.881373587019543
-6.0
0.0
2.0
0.0


In [116]:
import torch

x1 = torch.Tensor([2.0]).double()                    ; x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()                    ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()                    ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                    ; w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()                    ; b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('____')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())
print('b', b.grad.item())

0.7071066904050358
____
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737
b 0.5000001283844369


In [111]:
torch.tensor([[1,2,3],[4,5,6]]).shape

torch.Size([2, 3])

In [113]:
torch.tensor([2.0])

tensor([2.])

In [114]:
torch.tensor([2.0]).dtype

torch.float32

In [115]:
torch.tensor([2.0]).double().dtype

torch.float64

In [117]:
o

tensor([0.7071], dtype=torch.float64, grad_fn=<TanhBackward0>)

In [118]:
o.item()

0.7071066904050358

In [120]:
x2.grad

tensor([0.5000], dtype=torch.float64)

In [121]:
x2.grad.item()

0.5000001283844369

# neural network

In [125]:
import random

class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]  #range(2) #[Value(-0.69), Value(0.37)]
        self.b = Value(random.uniform(-1,1))


    def __call__(self, x):
        # w * x + b
        return 0.0

x = [2.0,3.0]
n = Neuron(2)
n(x)

0.0

In [126]:
import random

class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]  #range(2) #[Value(-0.69), Value(0.37)]
        self.b = Value(random.uniform(-1,1))


    def __call__(self, x):
        # w * x + b
        print (list(zip(self.w,x)))

x = [2.0,3.0]
n = Neuron(2)
n(x)

[(Value(data=0.8707629915435444), 2.0), (Value(data=0.599540236653215), 3.0)]


In [141]:
import random

class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]  #range(2) #[Value(-0.69), Value(0.37)]
        self.b = Value(random.uniform(-1,1))


    def __call__(self, x):
        # w * x + b
        
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

x = [2.0, 3.0]
n = Neuron(2)
n(x)

Value(data=-0.5794388038824165)

In [143]:
import random

class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]  #range(2) #[Value(-0.69), Value(0.37)]
        self.b = Value(random.uniform(-1,1))


    def __call__(self, x):
        # w * x + b
        
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs

x = [2.0, 3.0]
n = Layer(2,3)
n(x)

[Value(data=0.38553385547933594),
 Value(data=0.91534168928875),
 Value(data=-0.9863564437059853)]

In [145]:
import random

class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]  #range(2) #[Value(-0.69), Value(0.37)]
        self.b = Value(random.uniform(-1,1))


    def __call__(self, x):
        # w * x + b
        
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs

class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

x = [2.0, 3.0, -1.0]
n = MLP(3,[4,4,1])
n(x)

[Value(data=0.15893821953697215)]

In [322]:
import random

class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]  #range(2) #[Value(-0.69), Value(0.37)]
        self.b = Value(random.uniform(-1,1))


    def __call__(self, x):
        # w * x + b
        
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs)== 1 else outs

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
        # params = []
        # for neuron in self.neurons:
        #     ps = neuron.parameters()
        #     params.extend(ps)
        # return params
        

class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]



In [323]:
x = [2.0, 3.0, -1.0]
n = MLP(3,[4,4,1])
n(x)

Value(data=0.96615215985935)

In [324]:
len(n.parameters())

41

In [325]:
n.parameters()

[Value(data=-0.4904489811131403),
 Value(data=0.8312531004422432),
 Value(data=-0.13946289619646435),
 Value(data=-0.14561578053502),
 Value(data=0.5226617483723242),
 Value(data=-0.31302393606954104),
 Value(data=0.2725267912948033),
 Value(data=-0.6403994444964625),
 Value(data=0.5838188767842292),
 Value(data=0.45752598342108164),
 Value(data=-0.9055779638605794),
 Value(data=0.08619192596984293),
 Value(data=0.7784247874029366),
 Value(data=0.6544559038562645),
 Value(data=-0.4540720650459096),
 Value(data=0.8207099492216854),
 Value(data=0.5497328787222144),
 Value(data=-0.5325873969693267),
 Value(data=-0.6083734617266312),
 Value(data=-0.6848413973111032),
 Value(data=0.9946235729623591),
 Value(data=-0.8520280615571196),
 Value(data=-0.8110113266352534),
 Value(data=-0.5976437041242857),
 Value(data=0.8832933689060389),
 Value(data=0.6797848298714642),
 Value(data=-0.24217058490228927),
 Value(data=-0.32910136486272656),
 Value(data=0.5651328122597488),
 Value(data=0.8293468605

In [326]:
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0],
]
ys = [1.0,-1.0,-1.0,1.0] #desired targets
ypred = [n(x) for x in xs]
ypred

[Value(data=0.96615215985935),
 Value(data=0.7677344821311892),
 Value(data=0.960264561032948),
 Value(data=0.9712892379022011)]

In [179]:
loss = sum([(yout - ygt)**2 for ygt, yout in zip(ys, ypred)])


#sum() underhood:
# total = 0
# for x in loss:
#     total = total + x  # ← this is where it fails
#int + Value  →  Value.__radd__(int)
#so that's why also __rsum__ is important
loss

Value(data=4.875714920658209)

In [180]:
loss.backward()

In [181]:
n.layers[0].neurons[0].w[0].grad

-0.07282151852685531

In [182]:
n.layers[0].neurons[0].w[0].data

0.8770711230548858

In [186]:
for p in n.parameters():
    p.data += -0.1 * p.grad

In [187]:
n.layers[0].neurons[0].w[0].data

0.8843532749075713

In [373]:
#final
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0],
]
ys = [1.0,-1.0,-1.0,1.0] #desired targets
n = MLP(3,[4,4,1])

In [420]:
#final
for k in range(20):
    
    # forward pass
    ypred = [n(x) for x in xs]
    loss = sum([(yout - ygt)**2 for ygt, yout in zip(ys, ypred)])

    #backward pass
    for p in n.parameters():
        p.grad=0.0
    loss.backward()
    
    #upgrade    
    for p in n.parameters():
        p.data += -0.01 * p.grad

    print(k,loss.data)

0 0.0035649437684168294
1 0.003561286448930545
2 0.0035576364366014016
3 0.0035539937097874177
4 0.0035503582469315976
5 0.0035467300265612308
6 0.003543109027287735
7 0.00353949522780611
8 0.0035358886068945503
9 0.0035322891434140915
10 0.0035286968163081205
11 0.0035251116046021128
12 0.0035215334874030644
13 0.003517962443899225
14 0.0035143984533596634
15 0.0035108414951338747
16 0.0035072915486514017
17 0.0035037485934214098
18 0.003500212609032401
19 0.0034966835751516967


In [407]:
loss.backward()

In [305]:
for p in n.parameters():
    p.data += -0.01 * p.grad

In [404]:
ypred

[Value(data=0.8124145462310607),
 Value(data=-0.8839709903388817),
 Value(data=-0.8010888152373391),
 Value(data=0.7787660186259736)]

In [308]:
n.parameters()

[Value(data=0.38065582365351036),
 Value(data=0.9019557540224278),
 Value(data=-0.8117925204487119),
 Value(data=-0.6283424886959319),
 Value(data=-0.529147711888393),
 Value(data=-0.2803609975072665),
 Value(data=-0.7886564837908899),
 Value(data=0.3313274593849759),
 Value(data=-0.4191802123886141),
 Value(data=0.6342356106294995),
 Value(data=-0.24220273866871428),
 Value(data=-0.17106509048647034),
 Value(data=-0.5901693513574958),
 Value(data=-0.15902011954483758),
 Value(data=1.1175858587966523),
 Value(data=0.3300491558412355),
 Value(data=1.1222062850071608),
 Value(data=-0.17070147704597877),
 Value(data=-0.49550382167408613),
 Value(data=-0.5894393611189681),
 Value(data=-0.3814071125559665),
 Value(data=1.0572553242960419),
 Value(data=0.7311064404516243),
 Value(data=0.12551727708663307),
 Value(data=-0.8334521433370706),
 Value(data=0.36067816129829816),
 Value(data=-0.7727997882115829),
 Value(data=-0.4995713821587709),
 Value(data=-0.6648057906003461),
 Value(data=-0.189