In [1]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w, b):
    return w * t_u + b

In [4]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p-t_c)**2
    return squared_diffs.mean()

In [5]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'lr_scheduler']

In [7]:
params = torch.tensor([1.0,0.0], requires_grad=True)
learning_rate=1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [8]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()
params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [29]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1,n_epochs+1):
        t_p = model(t_u,*params)
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 500 == 0:
            print("Epoch %d, Loss %f"%(epoch, float(loss)))
    return params

In [13]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
training_loop(n_epochs=5000,
             optimizer=optimizer,
             params=params,
             t_u=t_un,
             t_c=t_c)

Epoch 500, Loss 7.860118
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957697
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927680
Epoch 4500, Loss 2.927651
Epoch 5000, Loss 2.927648


tensor([  5.3671, -17.3012], requires_grad=True)

In [16]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)

training_loop(n_epochs=5000,
             optimizer=optimizer,
             params=params,
             t_u=t_u,
             t_c=t_c)

Epoch 500, Loss 7.612903
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928578
Epoch 2000, Loss 2.927646
Epoch 2500, Loss 2.927646
Epoch 3000, Loss 2.927645
Epoch 3500, Loss 2.927647
Epoch 4000, Loss 2.927645
Epoch 4500, Loss 2.927646
Epoch 5000, Loss 2.927646


tensor([  0.5368, -17.3048], requires_grad=True)

In [17]:
n_samples = t_u.shape[0]
n_val = int(0.2*n_samples)
shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([ 2,  9, 10,  6,  3,  5,  8,  1,  4]), tensor([7, 0]))

In [18]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [19]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward() # <2>
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print('Epoch {}, Training loss {}, Validation loss {}'.format(
                epoch, float(train_loss), float(val_loss)))
            
    return params

In [20]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = val_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 92.9322738647461, Validation loss 23.808650970458984
Epoch 2, Training loss 26.020658493041992, Validation loss 59.395904541015625
Epoch 3, Training loss 19.933879852294922, Validation loss 73.791748046875
Epoch 500, Training loss 9.596940040588379, Validation loss 29.60521697998047
Epoch 1000, Training loss 5.6605963706970215, Validation loss 10.715548515319824
Epoch 1500, Training loss 4.083279132843018, Validation loss 4.244187355041504
Epoch 2000, Training loss 3.4512393474578857, Validation loss 2.3460235595703125
Epoch 2500, Training loss 3.197977066040039, Validation loss 2.025334596633911
Epoch 3000, Training loss 3.096492290496826, Validation loss 2.1753077507019043


tensor([  5.3576, -17.2197], requires_grad=True)

In [21]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad(): # <1>
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # <2>
            
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

In [22]:
def calc_forward(t_u, t_c, is_train):
    with torch.set_grad_enabled(is_train):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
    return loss

In [23]:
def new_model(t_u, w1, w2, b):
    return w2 * t_u ** 2 + w1 * t_u + b

In [24]:
new_params = torch.tensor([1.0,1.0,0.0], requires_grad=True)
learning_rate=1e-5
new_optimizer = optim.SGD([new_params], lr=learning_rate)

In [27]:
t_p = new_model(t_u, *new_params)
loss = loss_fn(t_p, t_c)
loss.backward()

new_optimizer.step()
new_params

tensor([-6.2362e+00, -4.6197e+02, -1.1928e-01], requires_grad=True)

In [35]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1,n_epochs+1):
        t_p = new_model(t_u,*new_params)
        loss = loss_fn(t_p, t_c)
        
        new_optimizer.zero_grad()
        loss.backward()
        new_optimizer.step()
        
        if epoch % 500 == 0:
            print("Epoch %d, Loss %f"%(epoch, float(loss)))
    return params

In [37]:
training_loop(n_epochs=10000,
             optimizer=new_optimizer,
             params=new_params,
             t_u=t_un,
             t_c=t_c)

Epoch 500, Loss 5190.970703
Epoch 1000, Loss 5017.227539
Epoch 1500, Loss 4849.295898
Epoch 2000, Loss 4686.992676
Epoch 2500, Loss 4530.120605
Epoch 3000, Loss 4378.500000
Epoch 3500, Loss 4231.958496
Epoch 4000, Loss 4090.325195
Epoch 4500, Loss 3953.435059
Epoch 5000, Loss 3821.125977
Epoch 5500, Loss 3693.247070
Epoch 6000, Loss 3569.652588
Epoch 6500, Loss 3450.197021
Epoch 7000, Loss 3334.737549
Epoch 7500, Loss 3223.144775
Epoch 8000, Loss 3115.289307
Epoch 8500, Loss 3011.048340
Epoch 9000, Loss 2910.298096
Epoch 9500, Loss 2812.919678
Epoch 10000, Loss 2718.798340


tensor([39.9848, -5.9032,  1.8114], requires_grad=True)