In [2]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 5092.60888671875
199 3440.08740234375
299 2327.009033203125
399 1576.5833740234375
499 1070.1724853515625
599 728.0962524414062
699 496.7945556640625
799 340.23455810546875
899 234.15354919433594
999 162.19882202148438
1099 113.3390121459961
1199 80.12462615966797
1299 57.520668029785156
1399 42.12027359008789
1499 31.615726470947266
1599 24.442556381225586
1699 19.53852081298828
1799 16.18195915222168
1899 13.881965637207031
1999 12.304146766662598
Result: y = -0.048535119742155075 + 0.8205710649490356 x + 0.00837311614304781 x^2 + -0.08818555623292923 x^3
