### Numpy implementation of a network

In [1]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

b_0 = np.random.randn()
b_1 = np.random.randn()
b_2 = np.random.randn()
b_3 = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # y = b_0 + b_1x + b_2x^2 + b_3x^3
    y_pred = b_0 + b_1 * x + b_2 * x ** 2 + b_3 * x ** 3

    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # backprop
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    b_0 -= learning_rate * grad_a
    b_1 -= learning_rate * grad_b
    b_2 -= learning_rate * grad_c
    b_3 -= learning_rate * grad_d

print(f'Result: y = {b_0} + {b_1} x + {b_2} x^2 + {b_3} x^3')

99 2800.2875573666956
199 1886.966966311541
299 1273.5940082337368
399 861.3209317258795
499 583.9780909088296
599 397.2398709524092
699 271.3919345046298
799 186.49976701136524
899 129.17934647607726
999 90.43731303516178
1099 64.22548904653358
1199 46.47281874744557
1299 34.43657898860908
1399 26.2672395115711
1499 20.716390438977474
1599 16.94054068469028
1699 14.369208118558294
1799 12.616157495374054
1899 11.41961909015965
1999 10.6019887366157
Result: y = -0.03305496460451579 + 0.8290814763331356 x + 0.005702531431264186 x^2 + -0.08939608498162624 x^3


### Torch with manual backprop

In [2]:
import torch
import math

dtype = torch.float
device = torch.device("cuda")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

b_0 = torch.randn((), device=device, dtype=dtype)
b_1 = torch.randn((), device=device, dtype=dtype)
b_2 = torch.randn((), device=device, dtype=dtype)
b_3 = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6

for t in range(2000):
    # Forward pass
    y_pred = b_0 + b_1 * x + b_2 * x ** 2 + b_3 * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # backprop
    grad_y_pred = 2.0 * (y_pred - y)
    grad_b_0 = grad_y_pred.sum()
    grad_b_1 = (grad_y_pred * x).sum()
    grad_b_2 = (grad_y_pred * x ** 2).sum()
    grad_b_3 = (grad_y_pred * x ** 3).sum()

    b_0 -= learning_rate * grad_b_0
    b_1 -= learning_rate * grad_b_1
    b_2 -= learning_rate * grad_b_2
    b_3 -= learning_rate * grad_b_3

print(f'Result: y = {b_0.item()} + {b_1.item()} x + {b_2.item()} x^2 + {b_3.item()} x^3')

99 27.092018127441406
199 21.743900299072266
299 17.961654663085938
399 15.286208152770996
499 13.393621444702148
599 12.054801940917969
699 11.107683181762695
799 10.437664031982422
899 9.963652610778809
999 9.628307342529297
1099 9.391057014465332
1199 9.223207473754883
1299 9.104451179504395
1399 9.020431518554688
1499 8.960987091064453
1599 8.918926239013672
1699 8.889167785644531
1799 8.868112564086914
1899 8.853214263916016
1999 8.842673301696777
Result: y = -0.005341586656868458 + 0.8569357395172119 x + 0.000921514758374542 x^2 + -0.093358114361763 x^3


### Torch with AutoGrad

In [4]:
import torch
import math

dtype = torch.float
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"Using {device.type} accelerator for this training")
torch.set_default_device(device)

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

b_0 = torch.randn((), dtype=dtype, requires_grad=True)
b_1 = torch.randn((), dtype=dtype, requires_grad=True)
b_2 = torch.randn((), dtype=dtype, requires_grad=True)
b_3 = torch.randn((), dtype=dtype, requires_grad=True)

learning_rate = 1e-6

for t in range(2000):
    y_pred = b_0 + b_1 * x + b_2 * x ** 2 + b_3 * x ** 3

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward() # autograd provided by torch

    with torch.no_grad():
        b_0 -= learning_rate * b_0.grad
        b_1 -= learning_rate * b_1.grad
        b_2 -= learning_rate * b_2.grad
        b_3 -= learning_rate * b_3.grad

        # zeroing the gradients after updating weights
        b_0.grad = None
        b_1.grad = None
        b_2.grad = None
        b_3.grad = None
    
print(f'Result: y = {b_0.item()} + {b_1.item()} x + {b_2.item()} x^2 + {b_3.item()} x^3')

Using cuda accelerator for this training
99 711.9589233398438
199 477.0148620605469
299 320.7103576660156
399 216.68212890625
499 147.41824340820312
599 101.28123474121094
699 70.53512573242188
799 50.035804748535156
899 36.36149597167969
999 27.23501205444336
1099 21.140478134155273
1199 17.068132400512695
1299 14.345427513122559
1399 12.52393913269043
1499 11.304506301879883
1599 10.487556457519531
1699 9.93985366821289
1799 9.572383880615234
1899 9.325641632080078
1999 9.159823417663574
Result: y = 0.01048079039901495 + 0.8415372371673584 x + -0.0018081108573824167 x^2 + -0.09116780757904053 x^3


In [2]:
import torch
import math

class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)
    
    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)
    
dtype = torch.float
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

b_0 = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b_1 = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
b_2 = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b_3 = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    P3 = LegendrePolynomial3.apply

    y_pred = b_0 + b_1 * P3(b_2 + b_3 * x)

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        b_0 -= learning_rate * b_0.grad
        b_1 -= learning_rate * b_1.grad
        b_2 -= learning_rate * b_2.grad
        b_3 -= learning_rate * b_3.grad

    b_0.grad = None
    b_1.grad = None
    b_2.grad = None
    b_3.grad = None

print(f'Result: y = {b_0.item()} + {b_1.item()} * P3({b_2.item()} x^2 + {b_3.item()} x)')

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03520202636719
499 50.978511810302734
599 37.40313720703125
699 28.20686912536621
799 21.973186492919922
899 17.745729446411133
999 14.877889633178711
1099 12.931766510009766
1199 11.610918998718262
1299 10.714248657226562
1399 10.105475425720215
1499 9.692106246948242
1599 9.411375045776367
1699 9.220745086669922
1799 9.091285705566406
1899 9.003361701965332
1999 8.94364070892334
Result: y = 1.2777713782885503e-11 + -2.208526849746704 * P3(-2.5764071431844116e-10 x^2 + 0.2554861009120941 x)


### Using the nn-package

In [3]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# making a tensor (x, x^2, x^3)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p) # (2000, 3)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1) # flattened to match 'y'
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    model.zero_grad()
    loss.backward()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 562.4680786132812
199 384.49505615234375
299 264.01129150390625
399 182.36000061035156
499 126.96601867675781
599 89.3446044921875
699 63.76543045043945
799 46.35431671142578
899 34.489593505859375
999 26.395336151123047
1099 20.86693572998047
1199 17.086641311645508
1299 14.498771667480469
1399 12.725177764892578
1499 11.508222579956055
1599 10.672234535217285
1699 10.097349166870117
1799 9.701560974121094
1899 9.428776741027832
1999 9.240561485290527
Result: y = -0.018001634627580643 + 0.8454814553260803 x + 0.003105581272393465 x^2 + -0.09172883629798889 x^3


### Using the optim package

In [5]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# making a tensor (x, x^2, x^3)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p) # (2000, 3)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1) # flattened to match 'y'
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()

    loss.backward()
    optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 1111.3438720703125
199 474.86346435546875
299 268.02484130859375
399 159.9213409423828
499 91.55714416503906
599 50.200653076171875
699 26.54151153564453
799 14.580799102783203
899 10.010010719299316
999 9.00098991394043
1099 8.908093452453613
1199 8.90410041809082
1299 8.907456398010254
1399 8.908346176147461
1499 8.907268524169922
1599 8.907672882080078
1699 8.910737991333008
1799 8.918737411499023
1899 8.927391052246094
1999 8.921027183532715
Result: y = 0.0004957019118592143 + 0.8572407364845276 x + 0.0004965616390109062 x^2 + -0.09283050894737244 x^3


### Using custom Module subclass

In [6]:
import torch
import math

class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.b_0 = torch.nn.Parameter(torch.randn(()))
        self.b_1 = torch.nn.Parameter(torch.randn(()))
        self.b_2 = torch.nn.Parameter(torch.randn(()))
        self.b_3 = torch.nn.Parameter(torch.randn(()))
    
    def forward(self, x):
        return self.b_0 + self.b_1 * x + self.b_2 * x ** 2 + self.b_3 * x ** 3
    
    def string(self):
        return f'y = {self.b_0.item()} + {self.b_1.item()} x + {self.b_2.item()} x^2 + {self.b_3.item()} x^3'
    
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)

for t in range(2000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 2410.904296875
199 1597.928955078125
299 1060.134765625
399 704.3670654296875
499 469.008544921875
599 313.30206298828125
699 210.28753662109375
799 142.1312255859375
899 97.03614807128906
999 67.19802856445312
1099 47.454002380371094
1199 34.3887825012207
1299 25.74268341064453
1399 20.020708084106445
1499 16.233705520629883
1599 13.72713851928711
1699 12.067984580993652
1799 10.969642639160156
1899 10.242546081542969
1999 9.761138916015625
Result: y = 0.004970307927578688 + 0.8272252082824707 x + -0.0008574603707529604 x^2 + -0.08913204818964005 x^3
