In [19]:
import torch
import random
import math

In [6]:
x1 = torch.Tensor([2.0]).double()                ; x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()                ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()               ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                ; w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()  ; b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())

0.7071066904050358
---
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737


In [20]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
e = a*b; e.label = 'e'
d = e + c; d.label = 'd'
f = Value(-2.0, label='f')
L = d * f; L.label = 'L'
L

Value(data=-8.0)

In [322]:
class Neuron:
    
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)] #random weights
        self.b = Value(random.uniform(-1, 1)) #Random Bias
        
    def __call__(self, x):
        # w * x + b;
        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]
        
    
class Layer:
    
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) ==1 else outs
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters() ]

class MLP:
    
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
        
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]


In [435]:
    
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)

Value(data=-0.3656556529471058)

In [436]:
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets

In [437]:
for k in range(2188):
    
    #forward Pass
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
    
    # backward pass
    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    #Update
    for p in n.parameters():
        p.data += -0.08* p.grad
    
    print(k, loss.data)
    

0 4.206963042436066
1 2.840023505533006
2 0.6016994458732916
3 0.08089116500889398
4 0.06666271796786574
5 0.05720270919806129
6 0.05026814070776445
7 0.04489325303411776
8 0.04057478766593609
9 0.03701611334067481
10 0.034027134851921444
11 0.03147860395920177
12 0.029278730852954207
13 0.027360121419650076
14 0.025671973811227284
15 0.024175164275708733
16 0.02283901949194281
17 0.021639122931673475
18 0.020555781273228435
19 0.019572926489944492
20 0.01867731373174683
21 0.01785792491807131
22 0.017105518397734304
23 0.01641228422539313
24 0.01577157704073472
25 0.015177706787846485
26 0.014625773102355343
27 0.014111533052102634
28 0.013631294624567547
29 0.013181830282846852
30 0.012760306304586343
31 0.012364224636346155
32 0.011991374748693278
33 0.011639793539825348
34 0.011307731759941757
35 0.010993625751688253
36 0.010696073550061566
37 0.010413814577102163
38 0.010145712316319626
39 0.009890739469225805
40 0.009647965189125076
41 0.00941654406105492
42 0.009195706555733063


357 0.0010489398469888782
358 0.0010459511311845106
359 0.0010429792830733654
360 0.0010400241608658168
361 0.001037085624352502
362 0.0010341635348824004
363 0.0010312577553412891
364 0.0010283681501305469
365 0.0010254945851463188
366 0.001022636927759005
367 0.0010197950467931277
368 0.0010169688125074578
369 0.0010141580965755473
370 0.0010113627720664746
371 0.0010085827134260561
372 0.0010058177964581707
373 0.0010030678983065423
374 0.0010003328974367346
375 0.0009976126736184877
376 0.0009949071079082548
377 0.0009922160826321053
378 0.0009895394813688656
379 0.0009868771889334845
380 0.000984229091360772
381 0.0009815950758892433
382 0.000978975030945374
383 0.0009763688461279908
384 0.0009737764121929254
385 0.0009711976210379718
386 0.0009686323656880071
387 0.0009660805402803777
388 0.00096354204005049
389 0.0009610167613176572
390 0.0009585046014711199
391 0.0009560054589563445
392 0.0009535192332614548
393 0.0009510458249039377
394 0.0009485851354175024
395 0.000946137067

732 0.0005052019548078142
733 0.000504502692007483
734 0.0005038053562795931
735 0.0005031099396886572
736 0.0005024164343426429
737 0.0005017248323926301
738 0.0005010351260325582
739 0.000500347307498912
740 0.000499661369070439
741 0.0004989773030678543
742 0.0004982951018535863
743 0.0004976147578314334
744 0.0004969362634463704
745 0.00049625961118418
746 0.0004955847935712481
747 0.0004949118031742511
748 0.0004942406325999062
749 0.0004935712744946713
750 0.0004929037215445301
751 0.0004922379664746876
752 0.0004915740020492901
753 0.0004909118210712409
754 0.0004902514163818666
755 0.0004895927808606778
756 0.0004889359074251637
757 0.0004882807890304635
758 0.00048762741866918225
759 0.00048697578937108744
760 0.0004863258942029179
761 0.0004856777262681128
762 0.0004850312787065427
763 0.0004843865446943247
764 0.0004837435174435367
765 0.0004831021902020116
766 0.0004824625562530815
767 0.00048182460891536545
768 0.00048118834154251136
769 0.0004805537475229982
770 0.0004799

1078 0.00034133391631991055
1079 0.0003410139890239818
1080 0.0003406946599545562
1081 0.00034037592743812984
1082 0.00034005778980743695
1083 0.00033974024540140183
1084 0.00033942329256513135
1085 0.000339106929649858
1086 0.00033879115501295014
1087 0.00033847596701782506
1088 0.0003381613640340016
1089 0.0003378473444369967
1090 0.0003375339066083343
1091 0.0003372210489355247
1092 0.0003369087698119989
1093 0.00033659706763714155
1094 0.00033628594081619136
1095 0.0003359753877602749
1096 0.00033566540688635725
1097 0.0003353559966171968
1098 0.0003350471553813485
1099 0.0003347388816131286
1100 0.00033443117375258025
1101 0.0003341240302454468
1102 0.00033381744954315
1103 0.0003335114301027761
1104 0.0003332059703870284
1105 0.0003329010688642178
1106 0.00033259672400822327
1107 0.0003322929342984875
1108 0.0003319896982199718
1109 0.00033168701426314264
1110 0.0003313848809239307
1111 0.0003310832967037328
1112 0.0003307822601093581
1113 0.0003304817696530307
1114 0.00033018182

1428 0.00025693597993712083
1429 0.00025675454870279543
1430 0.00025657337335436734
1431 0.0002563924533515224
1432 0.0002562117881554875
1433 0.0002560313772289874
1434 0.0002558512200362714
1435 0.0002556713160430731
1436 0.0002554916647166323
1437 0.0002553122655256879
1438 0.00025513311794044907
1439 0.0002549542214326224
1440 0.0002547755754753751
1441 0.00025459717954337015
1442 0.0002544190331127095
1443 0.0002542411356609746
1444 0.0002540634866671903
1445 0.000253886085611842
1446 0.00025370893197686293
1447 0.00025353202524561755
1448 0.0002533553649029123
1449 0.00025317895043498295
1450 0.00025300278132948814
1451 0.00025282685707551515
1452 0.00025265117716356334
1453 0.0002524757410855384
1454 0.0002523005483347636
1455 0.0002521255984059515
1456 0.00025195089079521724
1457 0.0002517764250000756
1458 0.00025160220051940546
1459 0.0002514282168534955
1460 0.0002512544735039933
1461 0.0002510809699739254
1462 0.000250907705767688
1463 0.0002507346803910392
1464 0.0002505618

1786 0.0002050568083811702
1787 0.00020494121491123208
1788 0.0002048257516917158
1789 0.00020471041850279252
1790 0.0002045952151251119
1791 0.00020448014133984705
1792 0.00020436519692863434
1793 0.0002042503816736143
1794 0.0002041356953574099
1795 0.0002040211377631355
1796 0.00020390670867439706
1797 0.00020379240787526286
1798 0.000203678235150305
1799 0.00020356419028457644
1800 0.00020345027306359928
1801 0.00020333648327337865
1802 0.0002032228207004081
1803 0.0002031092851316372
1804 0.00020299587635451343
1805 0.00020288259415694303
1806 0.000202769438327299
1807 0.00020265640865444858
1808 0.00020254350492770248
1809 0.00020243072693686294
1810 0.00020231807447218249
1811 0.00020220554732438412
1812 0.00020209314528465944
1813 0.00020198086814466354
1814 0.00020186871569650695
1815 0.0002017566877327703
1816 0.00020164478404647977
1817 0.000201533004431132
1818 0.0002014213486806787
1819 0.00020130981658952538
1820 0.00020119840795252642
1821 0.00020108712256499772
1822 0.0

2167 0.00016878646986668075
2168 0.0001687081519571463
2169 0.00016862990672938765
2170 0.00016855173408234533
2171 0.00016847363391513177
2172 0.00016839560612706223
2173 0.00016831765061762742
2174 0.00016823976728649908
2175 0.00016816195603354688
2176 0.00016808421675881848
2177 0.00016800654936254707
2178 0.00016792895374515753
2179 0.00016785142980723923
2180 0.00016777397744958698
2181 0.00016769659657316484
2182 0.0001676192870791243
2183 0.00016754204886879813
2184 0.00016746488184370134
2185 0.00016738778590551999
2186 0.00016731076095614583
2187 0.00016723380689762432


In [438]:
ypred

[Value(data=0.9943166940615314),
 Value(data=-0.9935588993786726),
 Value(data=-0.9939195176584392),
 Value(data=0.9924850949581984)]