### Numpy implementation of a network

In [1]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

b_0 = np.random.randn()
b_1 = np.random.randn()
b_2 = np.random.randn()
b_3 = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # y = b_0 + b_1x + b_2x^2 + b_3x^3
    y_pred = b_0 + b_1 * x + b_2 * x ** 2 + b_3 * x ** 3

    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # backprop
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    b_0 -= learning_rate * grad_a
    b_1 -= learning_rate * grad_b
    b_2 -= learning_rate * grad_c
    b_3 -= learning_rate * grad_d

print(f'Result: y = {b_0} + {b_1} x + {b_2} x^2 + {b_3} x^3')

99 2800.2875573666956
199 1886.966966311541
299 1273.5940082337368
399 861.3209317258795
499 583.9780909088296
599 397.2398709524092
699 271.3919345046298
799 186.49976701136524
899 129.17934647607726
999 90.43731303516178
1099 64.22548904653358
1199 46.47281874744557
1299 34.43657898860908
1399 26.2672395115711
1499 20.716390438977474
1599 16.94054068469028
1699 14.369208118558294
1799 12.616157495374054
1899 11.41961909015965
1999 10.6019887366157
Result: y = -0.03305496460451579 + 0.8290814763331356 x + 0.005702531431264186 x^2 + -0.08939608498162624 x^3


### Torch with manual backprop

In [2]:
import torch
import math

dtype = torch.float
device = torch.device("cuda")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

b_0 = torch.randn((), device=device, dtype=dtype)
b_1 = torch.randn((), device=device, dtype=dtype)
b_2 = torch.randn((), device=device, dtype=dtype)
b_3 = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6

for t in range(2000):
    # Forward pass
    y_pred = b_0 + b_1 * x + b_2 * x ** 2 + b_3 * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # backprop
    grad_y_pred = 2.0 * (y_pred - y)
    grad_b_0 = grad_y_pred.sum()
    grad_b_1 = (grad_y_pred * x).sum()
    grad_b_2 = (grad_y_pred * x ** 2).sum()
    grad_b_3 = (grad_y_pred * x ** 3).sum()

    b_0 -= learning_rate * grad_b_0
    b_1 -= learning_rate * grad_b_1
    b_2 -= learning_rate * grad_b_2
    b_3 -= learning_rate * grad_b_3

print(f'Result: y = {b_0.item()} + {b_1.item()} x + {b_2.item()} x^2 + {b_3.item()} x^3')

99 27.092018127441406
199 21.743900299072266
299 17.961654663085938
399 15.286208152770996
499 13.393621444702148
599 12.054801940917969
699 11.107683181762695
799 10.437664031982422
899 9.963652610778809
999 9.628307342529297
1099 9.391057014465332
1199 9.223207473754883
1299 9.104451179504395
1399 9.020431518554688
1499 8.960987091064453
1599 8.918926239013672
1699 8.889167785644531
1799 8.868112564086914
1899 8.853214263916016
1999 8.842673301696777
Result: y = -0.005341586656868458 + 0.8569357395172119 x + 0.000921514758374542 x^2 + -0.093358114361763 x^3


### Torch with AutoGrad

In [4]:
import torch
import math

dtype = torch.float
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"Using {device.type} accelerator for this training")
torch.set_default_device(device)

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

b_0 = torch.randn((), dtype=dtype, requires_grad=True)
b_1 = torch.randn((), dtype=dtype, requires_grad=True)
b_2 = torch.randn((), dtype=dtype, requires_grad=True)
b_3 = torch.randn((), dtype=dtype, requires_grad=True)

learning_rate = 1e-6

for t in range(2000):
    y_pred = b_0 + b_1 * x + b_2 * x ** 2 + b_3 * x ** 3

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward() # autograd provided by torch

    with torch.no_grad():
        b_0 -= learning_rate * b_0.grad
        b_1 -= learning_rate * b_1.grad
        b_2 -= learning_rate * b_2.grad
        b_3 -= learning_rate * b_3.grad

        # zeroing the gradients after updating weights
        b_0.grad = None
        b_1.grad = None
        b_2.grad = None
        b_3.grad = None
    
print(f'Result: y = {b_0.item()} + {b_1.item()} x + {b_2.item()} x^2 + {b_3.item()} x^3')

Using cuda accelerator for this training
99 711.9589233398438
199 477.0148620605469
299 320.7103576660156
399 216.68212890625
499 147.41824340820312
599 101.28123474121094
699 70.53512573242188
799 50.035804748535156
899 36.36149597167969
999 27.23501205444336
1099 21.140478134155273
1199 17.068132400512695
1299 14.345427513122559
1399 12.52393913269043
1499 11.304506301879883
1599 10.487556457519531
1699 9.93985366821289
1799 9.572383880615234
1899 9.325641632080078
1999 9.159823417663574
Result: y = 0.01048079039901495 + 0.8415372371673584 x + -0.0018081108573824167 x^2 + -0.09116780757904053 x^3
