In [1]:
# -*- coding: utf-8 -*-
import numpy as np
import math

# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 3020.537863260472
199 2028.3830364547546
299 1364.1161790635072
399 919.0633862567056
499 620.6633613965618
599 420.4386957794433
699 285.982529893925
799 195.61731968944775
899 134.83293184736974
999 93.91009490349839
1099 66.33387623757909
1199 47.73395768813066
1299 35.17636619014816
1399 26.689804439102968
1499 20.948667932686877
1599 17.060772771227136
1699 14.425103202542775
1799 12.636411518083825
1899 11.421189545414604
1999 10.594660932157918
Result: y = -0.030816971227592038 + 0.8270983622481537 x + 0.0053164403333743 x^2 + -0.08911400427214695 x^3


In [2]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 210.83349609375
199 142.7140655517578
299 97.57928466796875
399 67.66878509521484
499 47.844276428222656
599 34.70277786254883
699 25.989967346191406
799 20.212160110473633
899 16.38010025024414
999 13.837969779968262
1099 12.151190757751465
1199 11.031715393066406
1299 10.288559913635254
1399 9.795083045959473
1499 9.467334747314453
1599 9.249578475952148
1699 9.10486125946045
1799 9.00864315032959
1899 8.944662094116211
1999 8.90209674835205
Result: y = -0.0032640877179801464 + 0.8482987284660339 x + 0.0005631088861264288 x^2 + -0.09212957322597504 x^3
