Refs:

https://github.com/deep-learning-with-pytorch/dlwpt-code

In [1]:
import numpy as np
import torch

### Optimizers

In [2]:
x = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
y = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]

x = torch.tensor(x)
y = torch.tensor(y)

#x = 0.1*x # normalize

In [3]:
x_norm = 0.1*x

In [4]:
def model(x, w, b):
    return w * x + b

In [5]:
def loss_fn(y_p, y):
    squared_diffs = (y_p - y)**2
    return squared_diffs.mean()

In [6]:
import torch.optim as optim

In [7]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_multi_tensor',
 'functional',
 'lr_scheduler',
 'swa_utils']

In [8]:
def training_loop(n_epochs, optimizer, params, x, y):
    
    for epoch in range(1, n_epochs + 1):
        y_p = model(x, *params) 
        loss = loss_fn(y_p, y)
        
        ## reset gradients to zero
        optimizer.zero_grad()
        
        ## calculate gradients
        loss.backward()  
        
        ## update params: params -= learning_rate * params.grad
        optimizer.step() 

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            
    return params

In [9]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(n_epochs = 5000, params = params,
              optimizer = optimizer, x = x_norm, y = y)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

### Splitting a dataset

In [10]:
n_samples = x.shape[0]
n_val = int(0.2 * n_samples)

i_shuffled = torch.randperm(n_samples)

i_train = i_shuffled[:-n_val]
i_val = i_shuffled[-n_val:]

i_train, i_val

(tensor([ 1,  3,  9, 10,  5,  4,  7,  0,  2]), tensor([8, 6]))

In [11]:
x = 0.1*x  ## normalize

x_train = x[i_train]
y_train = y[i_train]

x_val = x[i_val]
y_val = y[i_val]

In [12]:
def training_loop(n_epochs, optimizer, params, x_train, y_train, x_val, y_val):
    for epoch in range(1, n_epochs + 1):
        y_train_p = model(x_train, *params)
        loss_train = loss_fn(y_train_p, y_train)
                             
        y_val_p = model(x_val, *params)
        loss_val = loss_fn(y_val_p, y_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")
            
    return params

In [13]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(n_epochs = 3000, optimizer = optimizer, params = params, 
              x_train = x_train, y_train = y_train, x_val = x_val, y_val = y_val)

Epoch 1, Training loss 98.0567, Validation loss 0.7488
Epoch 2, Training loss 37.8207, Validation loss 13.0239
Epoch 3, Training loss 30.7609, Validation loss 24.6283
Epoch 500, Training loss 7.5900, Validation loss 9.0263
Epoch 1000, Training loss 3.2741, Validation loss 5.7568
Epoch 1500, Training loss 2.4474, Validation loss 5.6629
Epoch 2000, Training loss 2.2891, Validation loss 5.8779
Epoch 2500, Training loss 2.2588, Validation loss 6.0210
Epoch 3000, Training loss 2.2530, Validation loss 6.0931


tensor([  5.4576, -17.7181], requires_grad=True)