In [1]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Create random Tensors for weights. For a third order polynomial, we need
# 4 weights: y = a + b x + c x^2 + d x^3
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(20000):
    # Forward pass: compute predicted y using operations on Tensors.
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the scalar value held in the loss.
    # MSE
    loss = ((y_pred - y)**2).sum()
    # RMSE
    loss = ((y_pred - y)**2).sum().sqrt()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
    # the gradient of the loss with respect to a, b, c, d respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
        if (t == 20000-1):
            print(f' the gradient in last itteration are {a.grad}, {b.grad}, {c.grad}, {d.grad}')
        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
        
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 355.8032531738281
199 331.9528503417969
299 308.64984130859375
399 285.9933776855469
499 264.1024475097656
599 243.1175537109375
699 223.201171875
799 204.53431701660156
899 187.30654907226562
999 171.69764709472656
1099 157.8492431640625
1199 145.8312225341797
1299 135.6141357421875
1399 127.06130981445312
1499 119.9486312866211
1599 114.00627136230469
1699 108.96340942382812
1799 104.58219146728516
1899 100.67333984375
1999 97.09760284423828
2099 93.75861358642578
2199 90.59331512451172
2299 87.56261444091797
2399 84.64397430419922
2499 81.82567596435547
2599 79.10304260253906
2699 76.47573852539062
2799 73.94607543945312
2899 71.51778411865234
2999 69.19527435302734
3099 66.9831771850586
3199 64.8858642578125
3299 62.90715789794922
3399 61.050113677978516
3499 59.31679153442383
3599 57.708065032958984
3699 56.22353744506836
3799 54.86146926879883
3899 53.6187744140625
3999 52.491119384765625
4099 51.47301483154297
4199 50.55807113647461
4299 49.73918151855469
4399 49.008754730224