In [86]:
# numpy only

import numpy as np
import math

# values from -pi to pi for input
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# random weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

# learning rate
lr = 1e-6
iters = 2000
for t in range(iters):
  # forward pass to compute predicted y
  y_pred = a + b*x + c*x**2 + d*x**3

  # compute loss square sum of differences
  loss = np.square(y_pred - y).sum()

  # back propagation to compute gradients
  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x**2).sum()
  grad_d = (grad_y_pred * x**3).sum()
  
  # update weights
  a -= lr * grad_a
  b -= lr * grad_b
  c -= lr * grad_c
  d -= lr * grad_d

print(f'result: y={a} + {b}*x + {c}*x^2 + {d}*x^3')


act = math.sin(2)
pred = a + b*2 + c*2**2 + d*2**3
print(f'actual sin(2)={act}, pred sin(2)={pred}')

result: y=-0.02069247170341442 + 0.8260516126036489*x + 0.0035697956930543823*x^2 + -0.08896511325039225*x^3
actual sin(2)=0.9092974268256817, pred sin(2)=0.9339690302729629


In [87]:
# tensors only

import torch
import math

dtype = torch.float

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), dtype=dtype)
b = torch.randn((), dtype=dtype)
c = torch.randn((), dtype=dtype)
d = torch.randn((), dtype=dtype)

lr = 1e-6
iters = 2000
for t in range(iters):
  y_pred = a + b*x + c*x**2 + d*x**3
  
  loss = (y_pred - y).pow(2).sum().item()

  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x**2).sum()
  grad_d = (grad_y_pred * x**3).sum()

  # update weights
  a -= lr * grad_a
  b -= lr * grad_b
  c -= lr * grad_c
  d -= lr * grad_d

print(f'result: y={a.item()} + {b.item()}*x + {c.item()}*x^2 + {d.item()}*x^3')

act = math.sin(2)
pred = a + b*2 + c*2**2 + d*2**3
print(f'actual sin(2)={act}, pred sin(2)={pred}')

result: y=-0.01740901544690132 + 0.8315060138702393*x + 0.0030033451039344072*x^2 + -0.08974095433950424*x^3
actual sin(2)=0.9092974268256817, pred sin(2)=0.9396888613700867


In [88]:
# autograd

import torch
import math

dtype = torch.float

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), dtype=dtype, requires_grad=True)
b = torch.randn((), dtype=dtype, requires_grad=True)
c = torch.randn((), dtype=dtype, requires_grad=True)
d = torch.randn((), dtype=dtype, requires_grad=True)

lr = 1e-6
iters = 2000
for i in range(iters):
  y_pred = a + b*x + c*x**2 + d*x**3
  
  loss = (y_pred - y).pow(2).sum()
  
  # computes for gradients for all tensors with required_grad=True
  loss.backward()
  
  # no need to track gradients of weights
  with torch.no_grad():
    a -= lr * a.grad; a.grad = None
    b -= lr * b.grad; b.grad = None
    c -= lr * c.grad; c.grad = None
    d -= lr * d.grad; d.grad = None

print(f'result: y={a.item()} + {b.item()}*x + {c.item()}*x^2 + {d.item()}*x^3')

act = math.sin(2)
pred = a + b*2 + c*2**2 + d*2**3
print(f'actual sin(2)={act}, pred sin(2)={pred}')

result: y=0.03807801380753517 + 0.8808224201202393*x + -0.006569087039679289*x^2 + -0.09675578027963638*x^3
actual sin(2)=0.9092974268256817, pred sin(2)=0.9994003176689148


In [89]:
# nn module

import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# since output y is a linear function of (x, x^2, x^3) we can consider it 
p = torch.tensor([1, 2, 3])
# broadcasts from a [2000, 1] to a [2000, 3]
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
  torch.nn.Linear(3, 1),
  torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

lr = 1e-6
for t in range(2000):
  y_pred = model(xx)
  loss = loss_fn(y_pred, y)
  model.zero_grad()
  loss.backward()
  
  with torch.no_grad():
    for param in model.parameters():
      param -= lr * param.grad

      
  linear_layer = model[0]

print(f'result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

result: y = -0.009385563433170319 + 0.842444121837616 x + 0.0016191651811823249 x^2 + -0.09129679948091507 x^3


In [91]:
# optim

import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# since output y is a linear function of (x, x^2, x^3) we can consider it 
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
  torch.nn.Linear(3, 1),
  torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

lr = 1e-3
iters = 2000
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
for t in range(iters):
  y_pred = model(xx)
  
  loss = loss_fn(y_pred, y)
  optimizer.zero_grad()

  loss.backward()

  optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

Result: y = 0.0004977249191142619 + 0.8561056852340698 x + 0.000497750355862081 x^2 + -0.09396786242723465 x^3
