## Section 5.5.1 - 5.5.2

In [1]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w, b):
    return w * t_u + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [4]:
t_p = model(t_u, 1, 0)
loss_fn(t_p, t_c)

tensor(1763.8846)

In [5]:
import torch.optim as optim
# dir(optim)

In [6]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)
print(type(optimizer))
print('')
print(optimizer)

<class 'torch.optim.sgd.SGD'>

SGD (
Parameter Group 0
    dampening: 0
    lr: 1e-05
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [7]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

print(params.grad)
loss.backward()
print(params.grad)
print()
print(params)
optimizer.step()
print(params)

# None
# tensor([4517.2969,   82.6000])

# tensor([1., 0.], requires_grad=True)
# tensor([ 9.9999e-01, -1.0000e-05], requires_grad=True)

None
tensor([4517.2969,   82.6000])

tensor([1., 0.], requires_grad=True)
tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)


In [8]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

print(params.grad)
print(params)
print('')

optimizer.zero_grad() # <1>
print(params.grad)
print(params)
print('')

loss.backward()
print(params.grad)
print(params)
print('')

optimizer.step()
print(params.grad)
print(params)
print('')

optimizer.step()
print(params.grad)
print(params)
print('')

optimizer.zero_grad()
print(params.grad)
print(params)
print('')

optimizer.zero_grad()
print(params.grad)
print(params)

None
tensor([1., 0.], requires_grad=True)

None
tensor([1., 0.], requires_grad=True)

tensor([-77.6140, -10.6400])
tensor([1., 0.], requires_grad=True)

tensor([-77.6140, -10.6400])
tensor([1.7761, 0.1064], requires_grad=True)

tensor([-77.6140, -10.6400])
tensor([2.5523, 0.2128], requires_grad=True)

tensor([0., 0.])
tensor([2.5523, 0.2128], requires_grad=True)

tensor([0., 0.])
tensor([2.5523, 0.2128], requires_grad=True)


In [9]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params) 
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            
    return params

In [10]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate) # <1>

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params, # <1> 
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 7.860118
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957697
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927680
Epoch 4500, Loss 2.927651
Epoch 5000, Loss 2.927648


tensor([  5.3671, -17.3012], requires_grad=True)

In [11]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate) # <1>

training_loop(
    n_epochs = 2000, 
    optimizer = optimizer,
    params = params,
    t_u = t_u, # <2> 
    t_c = t_c)

Epoch 500, Loss 7.612903
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928578
Epoch 2000, Loss 2.927646


tensor([  0.5367, -17.3021], requires_grad=True)

## Section 5.5.3 - Section 5.5.3

In [12]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

torch.manual_seed(42)
shuffled_indices = torch.randperm(n_samples)
shuffled_indices = torch.tensor(range(n_samples))

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8]), tensor([ 9, 10]))

In [13]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [14]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward() # <2>
        # val_loss.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validatiobn loss {val_loss.item():.4f}")
            
    return params

In [15]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = val_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 70.5623, Validatiobn loss 124.4736
Epoch 2, Training loss 40.3757, Validatiobn loss 51.5750
Epoch 3, Training loss 34.2207, Validatiobn loss 30.3821
Epoch 500, Training loss 7.1204, Validatiobn loss 6.4052
Epoch 1000, Training loss 3.4128, Validatiobn loss 4.2236
Epoch 1500, Training loss 2.8828, Validatiobn loss 3.7480
Epoch 2000, Training loss 2.8070, Validatiobn loss 3.6181
Epoch 2500, Training loss 2.7962, Validatiobn loss 3.5761
Epoch 3000, Training loss 2.7946, Validatiobn loss 3.5612


tensor([  5.3306, -17.0593], requires_grad=True)

In [16]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad(): # <1>
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # <2>
            
        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validatiobn loss {val_loss.item():.4f}")        
            
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
    
    return params

In [17]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = val_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 70.5623, Validatiobn loss 124.4736
Epoch 2, Training loss 40.3757, Validatiobn loss 51.5750
Epoch 3, Training loss 34.2207, Validatiobn loss 30.3821
Epoch 500, Training loss 7.1204, Validatiobn loss 6.4052
Epoch 1000, Training loss 3.4128, Validatiobn loss 4.2236
Epoch 1500, Training loss 2.8828, Validatiobn loss 3.7480
Epoch 2000, Training loss 2.8070, Validatiobn loss 3.6181
Epoch 2500, Training loss 2.7962, Validatiobn loss 3.5761
Epoch 3000, Training loss 2.7946, Validatiobn loss 3.5612


tensor([  5.3306, -17.0593], requires_grad=True)

In [18]:
def calc_forward(t_u, t_c, is_train):
    with torch.set_grad_enabled(is_train):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
    return loss

def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_loss = calc_forward(train_t_u, train_t_c, True)
        val_loss = calc_forward(val_t_u, val_t_c, False)
            
        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validatiobn loss {val_loss.item():.4f}")        
            
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
    
    return params

In [19]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = val_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 70.5623, Validatiobn loss 124.4736
Epoch 2, Training loss 40.3757, Validatiobn loss 51.5750
Epoch 3, Training loss 34.2207, Validatiobn loss 30.3821
Epoch 500, Training loss 7.1204, Validatiobn loss 6.4052
Epoch 1000, Training loss 3.4128, Validatiobn loss 4.2236
Epoch 1500, Training loss 2.8828, Validatiobn loss 3.7480
Epoch 2000, Training loss 2.8070, Validatiobn loss 3.6181
Epoch 2500, Training loss 2.7962, Validatiobn loss 3.5761
Epoch 3000, Training loss 2.7946, Validatiobn loss 3.5612


tensor([  5.3306, -17.0593], requires_grad=True)