In [3]:
import torch as tc
import math

In [19]:
dtype = tc.float
device = tc.device("cpu")

x = tc.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = tc.sin(x)

a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b*x + c*x**2 + d*x**3
    loss = (y_pred - y).pow(2).sum()

    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f"Results: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3")

99 2996.915283203125
199 1986.640380859375
299 1318.0299072265625
399 875.5083618164062
499 582.6061401367188
599 388.72296142578125
699 260.37506103515625
799 175.404052734375
899 119.1455078125
999 81.89423370361328
1099 57.22587203979492
1199 40.88872528076172
1299 30.067890167236328
1399 22.899829864501953
1499 18.151002883911133
1599 15.004512786865234
1699 12.919422149658203
1799 11.537494659423828
1899 10.62143611907959
1999 10.014086723327637
Results: y = 0.008110667578876019 + 0.8239452838897705 x + -0.00139922508969903 x^2 + -0.08866550773382187 x^3


In [32]:
x = tc.linspace(-math.pi, math.pi, 2000)
y = tc.sin(x)

p = tc.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    tc.nn.Linear(3, 1),
    tc.nn.Flatten(0, 1)
)

loss_fn = tc.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    model.zero_grad()

    loss.backward()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]
        

99 374.52178955078125
199 252.982666015625
299 171.93011474609375
399 117.84918975830078
499 81.7451400756836
599 57.62850570678711
699 41.5095100402832
799 30.729320526123047
899 23.514833450317383
999 18.68340492248535
1099 15.445511817932129
1199 13.27396011352539
1299 11.816438674926758
1399 10.837404251098633
1499 10.17919635772705
1599 9.73630142211914
1699 9.438043594360352
1799 9.236984252929688
1899 9.101323127746582
1999 9.009705543518066


In [33]:
x = tc.linspace(-math.pi, math.pi, 2000)
y = tc.sin(x)

p = tc.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    tc.nn.Linear(3, 1),
    tc.nn.Flatten(0, 1)
)

loss_fn = tc.nn.MSELoss(reduction='sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    model.zero_grad()

    loss.backward()

    optimizer.step()

99 771.5487060546875
199 586.7696533203125
299 445.9894714355469
399 332.6747741699219
499 240.30636596679688
599 165.83172607421875
699 107.88685607910156
799 65.2943115234375
899 36.54120635986328
999 19.61551856994629
1099 11.726996421813965
1199 9.287557601928711
1299 8.935791015625
1399 8.90194320678711
1499 8.905007362365723
1599 8.907093048095703
1699 8.907391548156738
1799 8.927767753601074
1899 8.907142639160156
1999 8.907236099243164


In [38]:
class Net(torch.nn.Module):
    def __init__(self, d_in, d_hidden, d_out):
        super(Net, self).__init__()
        self.ln1 = tc.nn.Linear(d_in, d_hidden)
        self.ln2 = tc.nn.Linear(d_hidden, d_out)

    def forward(self, x):
        h_relu = self.ln1(x).clamp(min=0)
        y_pred = self.ln2(h_relu)
        return y_pred

train_size, d_in, d_hidden, d_out = 64, 1000, 100, 10
x = torch.randn(train_size, d_in)
y = torch.randn(train_size, d_out)

model = Net(d_in, d_hidden, d_out)

optimizer = tc.optim.SGD(model.parameters(), lr=1e-4)
loss_fn = tc.nn.MSELoss(reduction='sum')

for t in range(500):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)

    if t % 50 == 49:
        print(t, loss.item())

    model.zero_grad()
    loss.backward()

    optimizer.step()

49 32.56114959716797
99 1.6436736583709717
149 0.14426842331886292
199 0.019672388210892677
249 0.003468212205916643
299 0.0007053564768284559
349 0.00015541346510872245
399 3.5889854189008474e-05
449 8.535595952707808e-06
499 2.0695404145953944e-06


[0;31mSignature:[0m       [0mmodel[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mType:[0m            Net
[0;31mString form:[0m    
Net(
  (ln1): Linear(in_features=1000, out_features=100, bias=True)
  (ln2): Linear(in_features=100, out_features=10, bias=True)
)
[0;31mDocstring:[0m       <no docstring>
[0;31mClass docstring:[0m
Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in
a tree structure. You can assign the submodules as regular attributes::

    import torch.nn as nn
    import torch.nn.functional as F

    class Model(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(1, 20, 5)
            self.conv2 = nn.Conv2d(20, 20, 5)

        def forward(self, x):
            x = F.relu(self.conv1(x))
            return F.relu(self.conv2(x))

Su