# Running Unit Tests

In [1]:
%run -m pytest tests

platform linux -- Python 3.12.3, pytest-7.4.0, pluggy-1.0.0
rootdir: /root/ml/Deep-Learning-Fundamentals/with_autograd
plugins: anyio-4.3.0
collected 60 items

tests/test_tensor_add.py [32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                            [  6%][0m
tests/test_tensor_concat.py [32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                     [ 20%][0m
tests/test_tensor_exp_log_sqrt_abs.py [32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                               [ 26%][0m
tests/test_tensor_matmul.py [32m.[0m[32m                                            [ 28%][0m
tests/test_tensor_maximum.py [32m.[0m[32m.[0m[32m                                          [ 31%][0m
tests/test_tensor_minimum.py [32m.[0m[32m.[0m[32m                                          [ 35%][0m
tests/test_tensor_mul.py [32m.[0m[32m.[0m[32m.[0m[32m                                             [ 40%][0m
t

# Trying out the AutoGrad Library

In [None]:
from autograd.tensor import Tensor

x = Tensor([10, -10, 10, -5, 6, 3, 1], requires_grad=True)

for i in range(100):
    x.zero_grad()
    y = (x * x).sum()
    y.backward()
    delta_x = Tensor(0.1) * x.grad
    x -= delta_x
    print(i, y)

0 Tensor(371, requires_grad=True)
1 Tensor(237.43999942302702, requires_grad=True)
2 Tensor(151.96159816589358, requires_grad=True)
3 Tensor(97.25541946008306, requires_grad=True)
4 Tensor(62.2434662559669, requires_grad=True)
5 Tensor(39.83581809034227, requires_grad=True)
6 Tensor(25.494923349694062, requires_grad=True)
7 Tensor(16.31675062036672, requires_grad=True)
8 Tensor(10.442720706534683, requires_grad=True)
9 Tensor(6.683341117982204, requires_grad=True)
10 Tensor(4.2773382543086145, requires_grad=True)
11 Tensor(2.73749638778152, requires_grad=True)
12 Tensor(1.751997638055377, requires_grad=True)
13 Tensor(1.1212785002338401, requires_grad=True)
14 Tensor(0.7176182411490816, requires_grad=True)
15 Tensor(0.45927567775639117, requires_grad=True)
16 Tensor(0.293936435562398, requires_grad=True)
17 Tensor(0.18811932151559232, requires_grad=True)
18 Tensor(0.12039636365269454, requires_grad=True)
19 Tensor(0.07705367187336241, requires_grad=True)
20 Tensor(0.049314348947758796,

In [None]:
from autograd.tensor import Tensor

x = Tensor([10, -10, 10, -5, 6, 3, 1], requires_grad=True)

# we want to minimize the sum of squares
for i in range(100):

    sum_of_squares = (x * x).sum()  # is a 0-tensor
    sum_of_squares.backward()

    # ugly b/c we haven't implemented the stuff yet
    delta_x = 0.1 * x.grad
    x = Tensor(x.data - delta_x.data, requires_grad=True)

    print(i, sum_of_squares)

0 Tensor(371, requires_grad=True)
1 Tensor(237.43999942302702, requires_grad=True)
2 Tensor(151.96159816589358, requires_grad=True)
3 Tensor(97.25541946008306, requires_grad=True)
4 Tensor(62.2434662559669, requires_grad=True)
5 Tensor(39.83581809034227, requires_grad=True)
6 Tensor(25.494923349694062, requires_grad=True)
7 Tensor(16.31675062036672, requires_grad=True)
8 Tensor(10.442720706534683, requires_grad=True)
9 Tensor(6.683341117982204, requires_grad=True)
10 Tensor(4.2773382543086145, requires_grad=True)
11 Tensor(2.73749638778152, requires_grad=True)
12 Tensor(1.751997638055377, requires_grad=True)
13 Tensor(1.1212785002338401, requires_grad=True)
14 Tensor(0.7176182411490816, requires_grad=True)
15 Tensor(0.45927567775639117, requires_grad=True)
16 Tensor(0.293936435562398, requires_grad=True)
17 Tensor(0.18811932151559232, requires_grad=True)
18 Tensor(0.12039636365269454, requires_grad=True)
19 Tensor(0.07705367187336241, requires_grad=True)
20 Tensor(0.049314348947758796,

## Linear Function

In [None]:
import numpy as np
from autograd.tensor import Tensor

x_data = Tensor(np.random.randn(100, 3))
coef = Tensor(np.array([-1, 3, -2], dtype=np.float32))
y_data = x_data @ coef + 5 #+ Tensor(np.random.randint(-1,1,size=(100,)))

w = Tensor(np.random.randn(3), requires_grad=True)
b = Tensor(np.random.randn(), requires_grad=True)

learning_rate = 0.001

batch_size = 32
x = x_data
y = y_data
for i in range(100):
    epoch_loss = 0
    for start in range(0, 100, batch_size):
        end = start + batch_size
        # indices = np.random.choice(100, batch_size)
        # x = x_data[indices]
        # y = y_data[indices]
        w.zero_grad()
        b.zero_grad()
        x = x_data[start:end]
        y = y_data[start:end]
        y_pred = x @ w + b
        residual = y - y_pred
        loss = (residual * residual).sum()
        loss.backward()
        epoch_loss += loss.data

        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
    epoch_loss /= int(100/batch_size)+1
    print(i, epoch_loss)

0 786.7650813452802
1 543.7256015781683
2 376.11975094004083
3 260.4281905255251
4 180.49663587387624
5 125.21981251640051
6 86.95669815140874
7 60.44521065398406
8 42.05832066283996
9 29.293743263783085
10 20.42357788253729
11 14.253556064947162
12 9.957465225060048
13 6.963178371604893
14 4.874135943863509
15 3.4152039153444593
16 2.3953074548671625
17 1.6816187056664333
18 1.1817094820157363
19 0.8311998292917383
20 0.5852017946029597
21 0.4123867093337502
22 0.2908675188912589
23 0.20533811350240644
24 0.14508396510238553
25 0.1025973327499724
26 0.07261234202655385
27 0.05143196448173064
28 0.036458129809649464
29 0.0258633240243061
30 0.018360853465994205
31 0.013043968134478959
32 0.00927309285605605
33 0.006596706260904831
34 0.004695771871451877
35 0.0033446742371781445
36 0.002383731888757077
37 0.0016998382925324505
38 0.001212814662087772
39 0.000865781028236956
40 0.0006183563884730197
41 0.00044185254531179974
42 0.00031587437203364695
43 0.0002259128719943797
44 0.000161

In [None]:
w, b

(Tensor([-1.00000003  2.99999973 -1.9999999 ], requires_grad=True),
 Tensor(5.000000021758216, requires_grad=True))

In [None]:
import numpy as np 

from autograd import Tensor, Parameter, Module, SGD

x_data = Tensor(np.random.randn(100, 3))
coef = Tensor(np.array([-1, +3, -2], dtype=np.float32))
y_data = x_data @ coef + 5

class Model(Module):
    def __init__(self) -> None:
        self.w = Parameter(3)
        self.b = Parameter()

    def predict(self, inputs: Tensor) -> Tensor:
        return inputs @ self.w + self.b

optimizer = SGD(lr=0.001)
batch_size = 32
model = Model()

for epoch in range(100):
    epoch_loss = 0.0

    for start in range(0, 100, batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_data[start:end]

        predicted = model.predict(inputs)
        actual = y_data[start:end]
        errors = predicted - actual
        loss = (errors * errors).sum()

        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)

    print(epoch, epoch_loss)

0 3582.855569507088
1 2392.4095982811878
2 1627.1856504611626
3 1123.4185952486048
4 784.9507772203214
5 553.6704163408913
6 393.4527229595214
7 281.2399923116268
8 201.96213197173972
9 145.5655127148063
10 105.22639126244647
11 76.24696845257729
12 55.35551318821869
13 40.25218793911052
14 29.30822794086628
15 21.36318636049009
16 15.586267398963278
17 11.380340369882326
18 8.31484112720268
19 6.078488608506819
20 4.445746130581633
21 3.2529043460958387
22 2.380950620489958
23 1.743254107896132
24 1.276686240891906
25 0.9352020892309951
26 0.6851907067538727
27 0.5021008589263555
28 0.3679886024798856
29 0.2697325983828235
30 0.19773388507061435
31 0.14496770544080345
32 0.106291537076724
33 0.07793971891423174
34 0.05715415387985299
35 0.04191429734477293
36 0.030739660915478202
37 0.02254529394954162
38 0.016536000062235608
39 0.012128883735985675
40 0.008896625748550109
41 0.0065259307502281115
42 0.0047870830502403944
43 0.003511637672831036
44 0.002576070092157559
45 0.0018897920

# Fizz Buzz

In [None]:
"""
Print the numbers 1 to 100, except
* if the number is divisible by 3, print "fizz"
* if the number is divisible by 5, print "buzz"
* if the number is divisible by 15, print "fizzbuzz"
"""
from typing import List

import numpy as np

from autograd import Tensor, Parameter, Module, tanh, SGD

def binary_encode(x: int) -> List[int]:
    return [x >> i & 1 for i in range(10)]

def fizz_buzz_encode(x: int) -> List[int]:
    if x % 15 == 0:
        return [0, 0, 0, 1]
    elif x % 5 == 0:
        return [0, 0, 1, 0]
    elif x % 3 == 0:
        return [0, 1, 0, 0]
    else:
        return [1, 0, 0, 0]

x_train = Tensor([binary_encode(x) for x in range(101, 1024)])
y_train = Tensor([fizz_buzz_encode(x) for x in range(101, 1024)])

class FizzBuzzModel(Module):
    def __init__(self, num_hidden: int = 50) -> None:
        self.w1 = Parameter(10, num_hidden)
        self.b1 = Parameter(num_hidden)

        self.w2 = Parameter(num_hidden, 4)
        self.b2 = Parameter(4)

    def predict(self, inputs: Tensor) -> Tensor:
        # inputs will be (batch_size, 10)
        x1 = inputs @ self.w1 + self.b1  # (batch_size, num_hidden)
        x2 = tanh(x1)                    # (batch_size, num_hidden)
        x3 = x2 @ self.w2 + self.b2      # (batch_size, 4)

        return x3

optimizer = SGD(lr=0.001)
batch_size = 32
model = FizzBuzzModel()

starts = np.arange(0, x_train.shape[0], batch_size)
for epoch in range(5000):
    epoch_loss = 0.0

    np.random.shuffle(starts)
    for start in starts:
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model.predict(inputs)
        actual = y_train[start:end]
        errors = predicted - actual
        loss = (errors * errors).sum()

        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)

    print(epoch, epoch_loss)

num_correct = 0
for x in range(1, 101):
    inputs = Tensor([binary_encode(x)])
    predicted = model.predict(inputs)[0]
    predicted_idx = np.argmax(predicted.data)
    actual_idx = np.argmax(fizz_buzz_encode(x))
    labels = [str(x), "fizz", "buzz", "fizzbuzz"]

    if predicted_idx == actual_idx:
        num_correct += 1

    print(x, labels[predicted_idx], labels[actual_idx], predicted)

print(num_correct, "/ 100")

0 131926.96455964775
1 8546.372731780399
2 2386.96031620019
3 1995.8705861391927
4 1444.4643568772917
5 1064.1561690140593
6 1018.9440692105245
7 985.8370722751757
8 872.5889253932788
9 830.9016938486973
10 759.3606446440942
11 759.6503980435675
12 737.9940701633993
13 717.2277332881305
14 697.7834815076342
15 689.6655668518575
16 671.7366880368523
17 653.812726870379
18 651.7219835468923
19 639.8357735513773
20 640.0392061904276
21 643.707840077508
22 640.0101541841854
23 622.8779626385281
24 621.7838316159451
25 614.4099617887321
26 624.339241309195
27 616.709414269811
28 610.5127775894616
29 607.989450469646
30 614.9038038762417
31 611.976277672214
32 603.6004175419239
33 608.2519003025145
34 606.2938873575783
35 600.8289191132098
36 597.8581101645098
37 599.1021682356156
38 594.3039713256558
39 598.1325223747016
40 594.1106883970361
41 598.7480360060526
42 597.4992917228658
43 597.1263257479719
44 591.1408428218389
45 600.2510016754917
46 590.0287891069943
47 589.2951433604992
48 5