In [3]:
import numpy as np
import math
import torch

In [5]:
# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 546.998090832642
199 385.4349773442141
299 272.5371720273177
399 193.59143668101188
499 138.35127180806447
599 99.67435512228504
699 72.57824096530611
799 53.584590585952135
899 40.26335876263124
999 30.91568743078087
1099 24.35310907505869
1199 19.743682916391744
1299 16.504688845439517
1399 14.227734164517825
1499 12.62644429836372
1599 11.499901664220722
1699 10.707074403987598
1799 10.14892004892265
1899 9.755852876110064
1999 9.4789622653894
Result: y = 0.02649613999999088 + 0.8509739550982013 x + -0.004571025047671128 x^2 + -0.09251009944457447 x^3


In [4]:

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 930.4994506835938
199 656.8201293945312
299 464.5826721191406
399 329.492431640625
499 234.52163696289062
599 167.7293701171875
699 120.73744201660156
799 87.66439819335938
899 64.3800277709961
999 47.98188781738281
1099 36.430023193359375
1199 28.289945602416992
1299 22.552467346191406
1399 18.507461547851562
1499 15.654998779296875
1599 13.643047332763672
1699 12.223677635192871
1799 11.222152709960938
1899 10.515329360961914
1999 10.016410827636719
Result: y = 0.03611345961689949 + 0.850972592830658 x + -0.006230174098163843 x^2 + -0.0925099104642868 x^3
