In [1]:
import torch

In [10]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)
t_un = t_u * 0.1

In [2]:
def model(t_u, w, b):
    return w * t_u + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c).pow(2)
    return squared_diffs.mean()

## Autograd

In [3]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

In [5]:
params.grad is None

True

In [7]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()
params.grad


tensor([4517.2969,   82.6000])

In [8]:
if params.grad is not None:
    params.grad.zero_()
params.grad

tensor([0., 0.])

In [12]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c, log_every=500):
    for epoch in range(0, n_epochs + 1):
        if params.grad is not None:
            params.grad.zero_()
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()
        with torch.no_grad():
            params -= learning_rate * params.grad
        if not epoch % log_every:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

In [13]:
training_loop(
    n_epochs=5000,
    learning_rate=1e-2,
    params=torch.tensor([1., 0.], requires_grad=True),
    t_u=t_un,
    t_c=t_c
)

Epoch 0, Loss 80.364342
Epoch 500, Loss 7.843369
Epoch 1000, Loss 3.825483
Epoch 1500, Loss 3.091630
Epoch 2000, Loss 2.957596
Epoch 2500, Loss 2.933116
Epoch 3000, Loss 2.928646
Epoch 3500, Loss 2.927829
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927648


tensor([  5.3671, -17.3012], requires_grad=True)

## Optimizer

In [15]:
from torch import optim
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_multi_tensor',
 'functional',
 'lr_scheduler',
 'swa_utils']

In [16]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [17]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

loss.backward()
optimizer.step()

params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [18]:
def training_loop(n_epochs, optimizer, params, t_u, t_c, log_every=500):
    for epoch in range(0, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if not epoch % log_every:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params


In [19]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    params=params,
    t_u=t_un,
    t_c=t_c
)

Epoch 0, Loss 80.364342
Epoch 500, Loss 7.843377
Epoch 1000, Loss 3.825483
Epoch 1500, Loss 3.091630
Epoch 2000, Loss 2.957596
Epoch 2500, Loss 2.933116
Epoch 3000, Loss 2.928646
Epoch 3500, Loss 2.927829
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927648


tensor([  5.3671, -17.3012], requires_grad=True)

In [20]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)
training_loop(
    n_epochs=2000,
    optimizer=optimizer,
    params=params,
    t_u=t_u,
    t_c=t_c
)


Epoch 0, Loss 1763.884766
Epoch 500, Loss 7.588878
Epoch 1000, Loss 3.085362
Epoch 1500, Loss 2.928569
Epoch 2000, Loss 2.927647


tensor([  0.5367, -17.3021], requires_grad=True)

## Validation

In [21]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]
train_indices, val_indices

(tensor([ 4,  6,  7,  8, 10,  3,  0,  1,  2]), tensor([5, 9]))

In [22]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [25]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c, log_every=500):
    for epoch in range(0, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if not epoch % log_every:
            print(
                f"Epoch {epoch}, Training loss {train_loss.item():.4f}",
                f" Validation loss {val_loss.item():.4f}"
            )
    return params

In [28]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
params = training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    params=params,
    train_t_u=train_t_un,
    val_t_u=val_t_un,
    train_t_c=train_t_c,
    val_t_c=val_t_c
)
params

Epoch 0, Training loss 91.7660  Validation loss 29.0568
Epoch 500, Training loss 7.0749  Validation loss 4.6095
Epoch 1000, Training loss 3.4093  Validation loss 4.0897
Epoch 1500, Training loss 2.9270  Validation loss 3.9970
Epoch 2000, Training loss 2.8635  Validation loss 3.9759
Epoch 2500, Training loss 2.8552  Validation loss 3.9699
Epoch 3000, Training loss 2.8541  Validation loss 3.9680


tensor([  5.4241, -17.2491], requires_grad=True)