In [9]:
import numpy as np
import math

In [10]:
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

In [11]:
learning_rate = 1e-6
for t in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3
  loss = np.square(y_pred - y).sum()
  if t % 100 == 99:
    print(t, loss)

  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x ** 2).sum()
  grad_d = (grad_y_pred * x ** 3).sum()

  a -= learning_rate * grad_a
  b -= learning_rate * grad_b
  c -= learning_rate * grad_c
  d -= learning_rate * grad_d

print()
print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 1480.7908045791282
199 998.6533561746223
299 675.0210058873408
399 457.6060643320533
499 311.4251795960346
599 213.05390735222673
699 146.79640959744492
799 102.1277368765183
899 71.98489489962995
999 51.624327597490094
1099 37.85759293894258
1199 28.539701564280406
1299 22.226356464357522
1399 17.94417189304475
1499 15.036515086679833
1599 13.060001270826188
1699 11.7149415040493
1799 10.798566188239441
1899 10.173538517741479
1999 9.746740827567384

Result: y = -0.023627221986184947 + 0.8365523275020561 x + 0.0040760889512877945 x^2 + -0.09045874850366083 x^3


In [12]:
import torch
import math

dtype = torch.float
device = torch.device('cpu')

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6

for t in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3
  loss = (y_pred - y).pow(2).sum().item()
  if t % 100 == 99:
    print(t, loss)

  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x ** 2).sum()
  grad_d = (grad_y_pred * x ** 3).sum()

  a -= learning_rate * grad_a
  b -= learning_rate * grad_b
  c -= learning_rate * grad_c
  d -= learning_rate * grad_d

print()
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 4168.8291015625
199 2801.4033203125
299 1884.9813232421875
399 1270.352783203125
499 857.8120727539062
599 580.692138671875
699 394.3836669921875
799 269.0201110839844
899 184.5891876220703
999 127.67378997802734
1099 89.27054595947266
1199 63.332557678222656
1299 45.79645919799805
1399 33.92865753173828
1499 25.888349533081055
1599 20.43539047241211
1699 16.733219146728516
1799 14.216890335083008
1899 12.50466537475586
1999 11.338286399841309

Result: y = -0.03759971261024475 + 0.8912369608879089 x + 0.006486576981842518 x^2 + -0.09823715686798096 x^3


In [13]:
import torch
import math

dtype = torch.float
device = torch.device('cpu')

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss = (y_pred - y).pow(2).sum()
  if t % 100 == 99:
    print(t, loss.item())

  loss.backward()

  with torch.no_grad():
    a -= learning_rate * a.grad
    b -= learning_rate * b.grad
    c -= learning_rate * c.grad
    d -= learning_rate * d.grad

    a.grad = None
    b.grad = None
    c.grad = None
    d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3') 

99 1604.6875
199 1083.2933349609375
299 732.9064331054688
399 497.23968505859375
499 338.5932312011719
599 231.69833374023438
699 159.60601806640625
799 110.93854522705078
899 78.05174255371094
999 55.80632019042969
1099 40.74352264404297
1199 30.533252716064453
1299 23.60500144958496
1399 18.898534774780273
1499 15.697835922241211
1599 13.51869010925293
1699 12.0333833694458
1799 11.01986026763916
1899 10.327446937561035
1999 9.853883743286133
Result: y = -0.025493798777461052 + 0.8359684944152832 x + 0.004398105666041374 x^2 + -0.09037570655345917 x^3


In [14]:
class LegendrePolyynomial3(torch.autograd.Function):

  @staticmethod
  def forward(ctx, input):
    ctx.save_for_backward(input)
    return 0.5 * (5 * input ** 3 - 2 * input)

  @staticmethod
  def backward(ctx, grad_output):
    input, = ctx.saved_tensors
    return grad_output * 1.5 * (5 * input ** 2 - 1)

dtype = torch.float
device = torch.device('cpu')

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
  P3 = LegendrePolyynomial3.apply

  y_pred = a + b * P3(c + d * x)
  loss = (y_pred - y).pow(2).sum()
  if t % 100 == 99:
    print(t, loss.item())

  loss.backward()

  with torch.no_grad():
    a -= learning_rate * a.grad
    b -= learning_rate * b.grad
    c -= learning_rate * c.grad
    d -= learning_rate * d.grad

    a.grad = None
    b.grad = None
    c.grad = None
    d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 531.5096435546875
199 476.79229736328125
299 427.54693603515625
399 383.27337646484375
499 343.5063781738281
599 307.81732177734375
699 275.8122253417969
799 247.13099670410156
899 221.4451141357422
999 198.45510864257812
1099 177.88998413085938
1199 159.50352478027344
1299 143.07290649414062
1399 128.39710998535156
1499 115.29411315917969
1599 103.60026550292969
1699 93.16829681396484
1799 83.86534881591797
1899 75.57231140136719
1999 68.18185424804688
Result: y = 2.3844886065660376e-09 + -3.1376540660858154 * P3(1.370906943520822e-09 + 0.2122785896062851 x)


In [27]:
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):
  y_pred = model(xx)

  loss = loss_fn(y_pred, y)
  if t % 100 == 99:
    print(t, loss.item())

  model.zero_grad()
  loss.backward()

  with torch.no_grad():
    for param in model.parameters():
      param -= learning_rate * param.grad

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 445.8328857421875
199 298.6925048828125
299 201.13345336914062
399 136.43655395507812
499 93.5240478515625
599 65.05513000488281
699 46.1640510559082
799 33.62565612792969
899 25.30165672302246
999 19.77407455444336
1099 16.102466583251953
1199 13.662847518920898
1299 12.041399002075195
1399 10.963366508483887
1499 10.24638557434082
1599 9.769344329833984
1699 9.451814651489258
1799 9.240379333496094
1899 9.099529266357422
1999 9.005644798278809
Result: y = -0.0055151344276964664 + 0.8443938493728638 x + 0.0009514518897049129 x^2 + -0.09157413244247437 x^3


In [30]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(),
                                lr=learning_rate)
for t in range(2000):
  y_pred = model(xx)

  loss = loss_fn(y_pred, y)
  if t % 100 == 99:
    print(t, loss.item())

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 18338.501953125
199 6979.48876953125
299 2099.48486328125
399 399.1882019042969
499 50.950782775878906
599 19.09084129333496
699 14.854743003845215
799 13.089790344238281
899 11.672599792480469
999 10.313640594482422
1099 9.33665943145752
1199 8.908737182617188
1299 8.822389602661133
1399 8.817212104797363
1499 9.427308082580566
1599 8.817669868469238
1699 9.268943786621094
1799 9.080066680908203
1899 8.970853805541992
1999 8.88848876953125
Result: y = 0.0002743241493590176 + 0.8563225865364075 x + 0.00027432903880253434 x^2 + -0.09374881535768509 x^3


In [32]:
import torch
import math

class Polynomial3(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.a = torch.nn.Parameter(torch.randn(()))
    self.b = torch.nn.Parameter(torch.randn(()))
    self.c = torch.nn.Parameter(torch.randn(()))
    self.d = torch.nn.Parameter(torch.randn(()))

  def forward(self, x):
    return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

  def string(self):
    return f'y = {self.a.item()} + {self.b.item()}x + {self.c.item()}x^2 + {self.d.item()}x^3'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)

for t in range(2000):
  y_pred = model(x)
  loss = criterion(y_pred, y)
  if t % 100 == 99:
    print(t, loss.item())

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

print(f'Result: {model.string()}')

99 293.79248046875
199 202.18272399902344
299 140.1663818359375
399 98.13898468017578
499 69.62710571289062
599 50.26314163208008
699 37.09758758544922
799 28.1362361907959
899 22.02974510192871
999 17.863840103149414
1099 15.018523216247559
1199 13.072983741760254
1299 11.741119384765625
1399 10.828317642211914
1499 10.202011108398438
1599 9.77180290222168
1699 9.475953102111816
1799 9.272274017333984
1899 9.131895065307617
1999 9.035039901733398
Result: y = -0.012911541387438774 + 0.8486614227294922x + 0.0022274574730545282x^2 + -0.09218115359544754x^3


In [35]:
import random
import torch
import math

class DynamicNet(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.a = torch.nn.Parameter(torch.randn(()))
    self.b = torch.nn.Parameter(torch.randn(()))
    self.c = torch.nn.Parameter(torch.randn(()))
    self.d = torch.nn.Parameter(torch.randn(()))
    self.e = torch.nn.Parameter(torch.randn(()))

  def forward(self, x):
    y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
    
    for exp in range(4, random.randint(4, 6)):
      y = y + self.e * x ** exp
    return y

  def string(self):
    return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x) 
model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 1506.599365234375
3999 678.8826293945312
5999 351.9313659667969
7999 164.04525756835938
9999 84.21209716796875
11999 43.79298400878906
13999 25.377161026000977
15999 16.902233123779297
17999 13.199381828308105
19999 10.609216690063477
21999 9.70823860168457
23999 9.252126693725586
25999 8.9295015335083
27999 8.955061912536621
29999 8.673763275146484
Result: y = -0.006007363088428974 + 0.8541889190673828 x + 0.0005625412450172007 x^2 + -0.0931592732667923 x^3 + 9.66699662967585e-05 x^4 ? + 9.66699662967585e-05 x^5 ?
