In [8]:
import torch.optim as optim
import torch

dir(optim)

['ASGD',
 'Adadelta',
 'Adafactor',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_adafactor',
 '_functional',
 'lr_scheduler',
 'swa_utils']

In [9]:
params = torch.tensor([1.0, 0.0], requires_grad= True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [10]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)
t_un = 0.1 * t_u

In [11]:
def model(t_u, w, b):
    return w * t_u + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [19]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print(f'Epoch {epoch}, Loss {float(loss)}')

    return params

In [24]:
params = torch.tensor([1.0, 0.0], requires_grad= True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)

training_loop(
    n_epochs=5000,
    optimizer= optimizer,
    params= params,
    t_u= t_u,
    t_c= t_c
)

Epoch 500, Loss 7.612898349761963
Epoch 1000, Loss 3.0866994857788086
Epoch 1500, Loss 2.928579807281494
Epoch 2000, Loss 2.9276442527770996
Epoch 2500, Loss 2.927645683288574
Epoch 3000, Loss 2.9276459217071533
Epoch 3500, Loss 2.927644968032837
Epoch 4000, Loss 2.927645683288574
Epoch 4500, Loss 2.927645206451416
Epoch 5000, Loss 2.927645206451416


tensor([  0.5368, -17.3048], requires_grad=True)

In [25]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 9,  5,  0, 10,  4,  3,  6,  8,  7]), tensor([1, 2]))

In [26]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [33]:
def model2(t_u, w1, w2, b):
    return w2 * t_u**2 + w1 * t_u + b

In [34]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model2(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad():
            val_t_p = model2(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f'Epoch {epoch}, Training Loss {train_loss.item():.4f},'
                  f' Validation loss {val_loss.item():.4f}')

    return params

In [41]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=10000,
    optimizer=optimizer,
    params=params,
    train_t_u=train_t_un,
    val_t_u=val_t_un,
    train_t_c=train_t_c,
    val_t_c=val_t_c
)

Epoch 1, Training Loss 700.2715, Validation loss 565.6469
Epoch 2, Training Loss 668.1007, Validation loss 537.6002
Epoch 3, Training Loss 637.4429, Validation loss 510.9091
Epoch 500, Training Loss 15.8082, Validation loss 1.7930
Epoch 1000, Training Loss 15.2907, Validation loss 1.8957
Epoch 1500, Training Loss 14.7932, Validation loss 1.9989
Epoch 2000, Training Loss 14.3149, Validation loss 2.1028
Epoch 2500, Training Loss 13.8552, Validation loss 2.2072
Epoch 3000, Training Loss 13.4133, Validation loss 2.3122
Epoch 3500, Training Loss 12.9885, Validation loss 2.4174
Epoch 4000, Training Loss 12.5802, Validation loss 2.5230
Epoch 4500, Training Loss 12.1876, Validation loss 2.6287
Epoch 5000, Training Loss 11.8103, Validation loss 2.7345
Epoch 5500, Training Loss 11.4476, Validation loss 2.8402
Epoch 6000, Training Loss 11.0989, Validation loss 2.9459
Epoch 6500, Training Loss 10.7637, Validation loss 3.0514
Epoch 7000, Training Loss 10.4415, Validation loss 3.1567
Epoch 7500, Tra

tensor([ 0.1014,  0.3790, -0.3619], requires_grad=True)