In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_soom
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset

In [2]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.f1 = nn.Linear(1, 10)
        self.f2 = nn.Linear(10, 20)
        self.f3 = nn.Linear(20, 20)
        self.f4 = nn.Linear(20, 10)
        self.f5 = nn.Linear(10, 1)

        self.activation = nn.ReLU()
        # self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.activation(self.f1(x))
        x = self.activation(self.f2(x))
        x = self.activation(self.f3(x))
        x = self.activation(self.f4(x))
        x = self.f5(x)
        
        return x


In [3]:
X = np.random.uniform(0, 1, size=(300, 1))
# y = X[:, 0] - X[:, 1]**2 + 2 * X[:, 2] * X[:, 3] + (1 / ((1 + X[:, 4]) ** 6))
y = np.sinc(X).sum(axis=1)

torch_data = TensorDataset(torch.Tensor(X), torch.Tensor(y))
data_loader = DataLoader(torch_data, batch_size=100)

In [4]:
model = Net()
loss_fn = nn.MSELoss()
opt = pytorch_soom.LM_cpd(model.parameters(), lr=1, model=model)

all_loss = {}
for epoch in range(100):
    print('epoch: ', epoch, end='')
    all_loss[epoch+1] = 0
    for batch_idx, (b_x, b_y) in enumerate(data_loader):
        pre = model(b_x)
        loss = loss_fn(pre, b_y)
        opt.zero_grad()
        loss.backward()

        # parameter update step based on optimizer
        opt.step(b_x, b_y)

        all_loss[epoch+1] += loss
    all_loss[epoch+1] /= len(data_loader)
    print(', loss: {}'.format(all_loss[epoch+1].detach().numpy().item()))

epoch:  0

  return F.mse_loss(input, target, reduction=self.reduction)


, loss: 0.15734048187732697
epoch:  1, loss: 0.1430911272764206
epoch:  2, loss: 0.1330896019935608
epoch:  3, loss: 0.12587898969650269
epoch:  4, loss: 0.12073305994272232
epoch:  5, loss: 0.11710748821496964
epoch:  6, loss: 0.11459001153707504
epoch:  7, loss: 0.11285939812660217
epoch:  8, loss: 0.11167964339256287
epoch:  9, loss: 0.11088114231824875
epoch:  10, loss: 0.1103433147072792
epoch:  11, loss: 0.10998222976922989
epoch:  12, loss: 0.1097400113940239
epoch:  13, loss: 0.10957714170217514
epoch:  14, loss: 0.10946730524301529
epoch:  15, loss: 0.10939282178878784
epoch:  16, loss: 0.10934203863143921
epoch:  17, loss: 0.109307199716568
epoch:  18, loss: 0.1092829704284668
epoch:  19, loss: 0.1092660129070282
epoch:  20, loss: 0.1092539057135582
epoch:  21, loss: 0.10924515873193741
epoch:  22, loss: 0.10923869162797928
epoch:  23, loss: 0.10923391580581665
epoch:  24, loss: 0.10923018306493759
epoch:  25, loss: 0.10922729969024658
epoch:  26, loss: 0.10922495275735855
ep

In [5]:
model = Net()
loss_fn = nn.MSELoss()
opt = optim.Adam(model.parameters())

all_loss = {}
for epoch in range(100):
    print('epoch: ', epoch, end = '')
    all_loss[epoch+1] = 0
    for batch_idx, (b_x, b_y) in enumerate(data_loader):
        pre = model(b_x)
        loss = loss_fn(pre, b_y)
        opt.zero_grad()
        loss.backward()

        # parameter update step based on optimizer
        opt.step()

        all_loss[epoch+1] += loss
    all_loss[epoch+1] /= len(data_loader)
    print(', loss: {}'.format(all_loss[epoch+1].detach().numpy().item()))


epoch:  0, loss: 0.2587035000324249
epoch:  1, loss: 0.2397034913301468
epoch:  2, loss: 0.22186093032360077
epoch:  3, loss: 0.20486371219158173
epoch:  4, loss: 0.18872815370559692
epoch:  5, loss: 0.17377306520938873
epoch:  6, loss: 0.16048216819763184
epoch:  7, loss: 0.14862339198589325
epoch:  8, loss: 0.13789282739162445
epoch:  9, loss: 0.12829919159412384
epoch:  10, loss: 0.11999199539422989
epoch:  11, loss: 0.1131119504570961
epoch:  12, loss: 0.1077706590294838
epoch:  13, loss: 0.10401351004838943
epoch:  14, loss: 0.1017664447426796
epoch:  15, loss: 0.10079535096883774
epoch:  16, loss: 0.10071191191673279
epoch:  17, loss: 0.10105209797620773
epoch:  18, loss: 0.101411372423172
epoch:  19, loss: 0.10156051069498062
epoch:  20, loss: 0.10146567970514297
epoch:  21, loss: 0.10121766477823257
epoch:  22, loss: 0.10093539953231812
epoch:  23, loss: 0.10070154815912247
epoch:  24, loss: 0.10054611414670944
epoch:  25, loss: 0.10045836120843887
epoch:  26, loss: 0.100410141

In [6]:
model = Net()
loss_fn = nn.MSELoss()
opt = pytorch_soom.Newton_cpd(model.parameters(), lr=1, model=model)

all_loss = {}
for epoch in range(100):
    print('epoch: ', epoch, end='')
    all_loss[epoch+1] = 0
    for batch_idx, (b_x, b_y) in enumerate(data_loader):
        pre = model(b_x)
        loss = loss_fn(pre, b_y)
        opt.zero_grad()
        loss.backward()

        # parameter update step based on optimizer
        opt.step(b_x, b_y)

        all_loss[epoch+1] += loss
    all_loss[epoch+1] /= len(data_loader)
    print(', loss: {}'.format(all_loss[epoch+1].detach().numpy().item()))

epoch:  0, loss: 0.5638055205345154
epoch:  1, loss: 0.35222554206848145
epoch:  2, loss: 0.2497107833623886
epoch:  3, loss: 0.26668545603752136
epoch:  4, loss: 0.24280951917171478
epoch:  5, loss: 0.1740289330482483
epoch:  6, loss: 0.1398189663887024
epoch:  7, loss: 0.12207093834877014
epoch:  8, loss: 0.11216113716363907
epoch:  9, loss: 0.10635871440172195
epoch:  10, loss: 0.10343404859304428
epoch:  11, loss: 0.10179099440574646
epoch:  12, loss: 0.10086837410926819
epoch:  13, loss: 0.10033486038446426
epoch:  14, loss: 0.10001718252897263
epoch:  15, loss: 0.09983772039413452
epoch:  16, loss: 0.09974392503499985
epoch:  17, loss: 0.09969896078109741
epoch:  18, loss: 0.09967195987701416
epoch:  19, loss: 0.09965749830007553
epoch:  20, loss: 0.10902784019708633
epoch:  21, loss: 0.10495344549417496
epoch:  22, loss: 0.10260497778654099
epoch:  23, loss: 0.10129103064537048
epoch:  24, loss: 0.10056231170892715
epoch:  25, loss: 0.10014406591653824
epoch:  26, loss: 0.099912