In [5]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim

In [6]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [3]:
t_c_arr = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u_arr = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c_arr)
t_u = torch.tensor(t_u_arr)
t_un = 0.1 * t_u

In [4]:
def model(t_u, w, b):
    return w * t_u + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [10]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)
params

tensor([1., 0.], requires_grad=True)

In [11]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()
optimizer.step()

params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [12]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, 1+n_epochs):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch % 500 == 0:
            print('Epoch %d, loss %f' % (epoch, float(loss)))
    return params

In [14]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
params

tensor([1., 0.], requires_grad=True)

In [15]:
training_loop(n_epochs=5000, optimizer=optimizer, params=params, t_u=t_un, t_c=t_c)

Epoch 500, loss 7.860115
Epoch 1000, loss 3.828538
Epoch 1500, loss 3.092191
Epoch 2000, loss 2.957698
Epoch 2500, loss 2.933134
Epoch 3000, loss 2.928648
Epoch 3500, loss 2.927830
Epoch 4000, loss 2.927679
Epoch 4500, loss 2.927652
Epoch 5000, loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [16]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.Adam([params], lr=learning_rate)
params

tensor([1., 0.], requires_grad=True)

In [17]:
training_loop(n_epochs=5000, optimizer=optimizer, params=params, t_u=t_un, t_c=t_c)

Epoch 500, loss 24.946461
Epoch 1000, loss 15.752771
Epoch 1500, loss 9.455151
Epoch 2000, loss 5.767542
Epoch 2500, loss 3.932838
Epoch 3000, loss 3.196811
Epoch 3500, loss 2.977151
Epoch 4000, loss 2.933181
Epoch 4500, loss 2.927968
Epoch 5000, loss 2.927654


tensor([  5.3660, -17.2952], requires_grad=True)

分割数据集

In [22]:
nsamples = t_u.shape[0]
n_val = int(0.2 * nsamples)
shuffled_indices = torch.randperm(nsamples)
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]
n_val, nsamples, train_indices, val_indices, shuffled_indices

(2,
 11,
 tensor([7, 8, 6, 4, 3, 2, 1, 9, 5]),
 tensor([ 0, 10]),
 tensor([ 7,  8,  6,  4,  3,  2,  1,  9,  5,  0, 10]))

In [23]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [40]:
def training_loop(n_epochs, optimizer, params, train_t_u, train_t_c, val_t_u, val_t_c):
    for epoch in range(1, 1+n_epochs):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(t_p = train_t_p, t_c = train_t_c)
        val_t_p = model(val_t_u, *params)
        val_loss = loss_fn(t_p = val_t_p, t_c=val_t_c)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        if epoch % 500 == 0:
            print(f"Epoch: {epoch}, training loss:{train_loss.item():.4f}, "
                  f"validation loss: {val_loss.item():.4f}")
    return params

In [41]:
n_epochs = 5000
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.Adam(params=[params], lr=learning_rate)
n_epochs, params, learning_rate, optimizer

(5000,
 tensor([1., 0.], requires_grad=True),
 0.01,
 Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.01
     maximize: False
     weight_decay: 0
 ))

In [42]:
result_params = training_loop(n_epochs=n_epochs, optimizer=optimizer, params=params,
              train_t_u=train_t_u, train_t_c=train_t_c,
              val_t_u=val_t_u, val_t_c=val_t_c)
result_params

Epoch: 500, training loss:22.7140, validation loss: 39.1506
Epoch: 1000, training loss:20.4301, validation loss: 35.8703
Epoch: 1500, training loss:17.5432, validation loss: 31.6328
Epoch: 2000, training loss:14.3864, validation loss: 26.8465
Epoch: 2500, training loss:11.2837, validation loss: 21.9186
Epoch: 3000, training loss:8.5235, validation loss: 17.2351
Epoch: 3500, training loss:6.3116, validation loss: 13.1111
Epoch: 4000, training loss:4.7370, validation loss: 9.7477
Epoch: 4500, training loss:3.7644, validation loss: 7.2146
Epoch: 5000, training loss:3.2619, validation loss: 5.4637


tensor([  0.4840, -14.4444], requires_grad=True)

In [43]:
def training_loop(n_epochs, optimizer, params, train_t_u, train_t_c, val_t_u, val_t_c):
    for epoch in range(1, 1+n_epochs):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(t_p = train_t_p, t_c = train_t_c)
        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(t_p = val_t_p, t_c=val_t_c)
            assert val_loss.requires_grad == False

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        if epoch % 500 == 0:
            print(f"Epoch: {epoch}, training loss:{train_loss.item():.4f}, "
                  f"validation loss: {val_loss.item():.4f}")
    return params

In [44]:
n_epochs = 5000
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.Adam(params=[params], lr=learning_rate)
n_epochs, params, learning_rate, optimizer

(5000,
 tensor([1., 0.], requires_grad=True),
 0.01,
 Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.01
     maximize: False
     weight_decay: 0
 ))

In [45]:
training_loop(n_epochs=n_epochs, optimizer=optimizer, params=params,
              train_t_u=train_t_u, train_t_c=train_t_c,
              val_t_u=val_t_u, val_t_c=val_t_c)

Epoch: 500, training loss:22.7140, validation loss: 39.1506
Epoch: 1000, training loss:20.4301, validation loss: 35.8703
Epoch: 1500, training loss:17.5432, validation loss: 31.6328
Epoch: 2000, training loss:14.3864, validation loss: 26.8465
Epoch: 2500, training loss:11.2837, validation loss: 21.9186
Epoch: 3000, training loss:8.5235, validation loss: 17.2351
Epoch: 3500, training loss:6.3116, validation loss: 13.1111
Epoch: 4000, training loss:4.7370, validation loss: 9.7477
Epoch: 4500, training loss:3.7644, validation loss: 7.2146
Epoch: 5000, training loss:3.2619, validation loss: 5.4637


tensor([  0.4840, -14.4444], requires_grad=True)

In [55]:
def calc_forward(t_u, t_c, is_train, params):
    with torch.set_grad_enabled(is_train):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
    return loss

def training_loop(n_epochs, optimizer, params, train_t_u, train_t_c, val_t_u, val_t_c):
    for epoch in range(1, 1+n_epochs):
        train_loss = calc_forward(t_u=train_t_u, t_c=train_t_c, is_train=True, params=params)
        val_loss = calc_forward(t_u=val_t_u, t_c=val_t_c, is_train=False, params=params)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        if epoch % 500 == 0:
            print(f"Epoch: {epoch}, training loss:{train_loss.item():.4f}, "
                  f"validation loss: {val_loss.item():.4f}")
    return params

In [56]:
n_epochs = 5000
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.Adam(params=[params], lr=learning_rate)
training_loop(n_epochs=n_epochs, optimizer=optimizer, params=params,
              train_t_u=train_t_u, train_t_c=train_t_c,
              val_t_u=val_t_u, val_t_c=val_t_c)

Epoch: 500, training loss:22.7140, validation loss: 39.1506
Epoch: 1000, training loss:20.4301, validation loss: 35.8703
Epoch: 1500, training loss:17.5432, validation loss: 31.6328
Epoch: 2000, training loss:14.3864, validation loss: 26.8465
Epoch: 2500, training loss:11.2837, validation loss: 21.9186
Epoch: 3000, training loss:8.5235, validation loss: 17.2351
Epoch: 3500, training loss:6.3116, validation loss: 13.1111
Epoch: 4000, training loss:4.7370, validation loss: 9.7477
Epoch: 4500, training loss:3.7644, validation loss: 7.2146
Epoch: 5000, training loss:3.2619, validation loss: 5.4637


tensor([  0.4840, -14.4444], requires_grad=True)

练习题

In [57]:
def model(t_u, w1, w2, b):
    return w1 * t_u ** 2 + w2 * t_u + b

In [60]:
n_epochs = 5000
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.Adam(params=[params], lr=learning_rate)
training_loop(n_epochs=n_epochs, optimizer=optimizer, params=params,
              train_t_u=train_t_u, train_t_c=train_t_c,
              val_t_u=val_t_u, val_t_c=val_t_c)

Epoch: 500, training loss:5.1488, validation loss: 10.6766
Epoch: 1000, training loss:5.0652, validation loss: 10.4882
Epoch: 1500, training loss:4.9475, validation loss: 10.2180
Epoch: 2000, training loss:4.7982, validation loss: 9.8662
Epoch: 2500, training loss:4.6208, validation loss: 9.4325
Epoch: 3000, training loss:4.4211, validation loss: 8.9197
Epoch: 3500, training loss:4.2088, validation loss: 8.3371
Epoch: 4000, training loss:3.9971, validation loss: 7.7025
Epoch: 4500, training loss:3.8020, validation loss: 7.0437
Epoch: 5000, training loss:3.6384, validation loss: 6.3973


tensor([ 0.0051, -0.0557, -1.1935], requires_grad=True)