In [1]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w, b):
    return w * t_u + b

In [4]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [5]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adafactor',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_adafactor',
 '_functional',
 'lr_scheduler',
 'swa_utils']

In [7]:
params = torch.tensor([1.0, 0.0], requires_grad=True)  # requires_grad=True para habilitar el rastreo de las operaciones sobre params
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [8]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()

params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [10]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad() # Para que en cada ciclo de entrenamiento se reseteen los gradientes
loss.backward()
optimizer.step()

params

tensor([1.7761, 0.1064], requires_grad=True)

In [11]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)

        optimizer.zero_grad()  # Reseteo del gradiente
        loss.backward()  # Calculo del gradiente en si
        optimizer.step()  # Actualización de los parámetros

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))

    return params

In [12]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate) # Instancia del optimizador SGD del submodulo optim

training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params, # Mismo argumento con el que se llamó al optimizador
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 7.860120
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [13]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1  # Notar que se cambió el lr por uno mas grande (y Adam no se hace problema)
optimizer = optim.Adam([params], lr=learning_rate) # Se instancia un nuevo Optimizador

training_loop(
    n_epochs = 2000,
    optimizer = optimizer,
    params = params,
    t_u = t_u, # Adam es menos sencible al escalado de parámetros (no se usa t_un)
    t_c = t_c)

Epoch 500, Loss 7.612900
Epoch 1000, Loss 3.086698
Epoch 1500, Loss 2.928578
Epoch 2000, Loss 2.927646


tensor([  0.5367, -17.3021], requires_grad=True)

In [14]:
# División de los datos en dos grupos, entrenamiento y validación

n_samples = t_u.shape[0]  # Número total de datos
n_val = int(0.2 * n_samples)  # 20% de los datos serán para validación

shuffled_indices = torch.randperm(n_samples)  # Genera una permutación aleatoria de los índices

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # Pueden cambiar de ejecución en ejecución

(tensor([10,  4,  3,  9,  7,  0,  1,  5,  6]), tensor([2, 8]))

In [15]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [16]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # Predicciones del modelo para los datos de entrenamiento
        train_loss = loss_fn(train_t_p, train_t_c)

        val_t_p = model(val_t_u, *params) # Predicciones para los datos de validación
        val_loss = loss_fn(val_t_p, val_t_c)

        optimizer.zero_grad()
        train_loss.backward() # No hay un val_loss.backward() ya que no queremos entrenar
        optimizer.step()      # el modelo con los datos de validación

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")

    return params

In [17]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000,
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1>
    val_t_u = val_t_un, # <1>
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 88.7100, Validation loss 42.8090
Epoch 2, Training loss 41.4218, Validation loss 13.5269
Epoch 3, Training loss 34.1070, Validation loss 12.9835
Epoch 500, Training loss 6.6346, Validation loss 7.8649
Epoch 1000, Training loss 3.1442, Validation loss 5.6421
Epoch 1500, Training loss 2.6852, Validation loss 4.9298
Epoch 2000, Training loss 2.6249, Validation loss 4.6839
Epoch 2500, Training loss 2.6170, Validation loss 4.5963
Epoch 3000, Training loss 2.6159, Validation loss 4.5647


tensor([  5.3056, -16.8024], requires_grad=True)

In [18]:
"""
Para evitar que se cree un grafo computacional al obtener val_t_p y calcular valr_loss (no hace falta
ya que no se pretende hacer val_loss.backward) lo que implicaria un desperdicio de recursos sobre todo
en modelos grandes, se encapsulan estos calculos en un bloque with
"""

def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # Comprueba que requires_grad este en False

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

In [19]:
"""
Usando el administrador de contexto set_autograd_enable se puede condicionar el código para que para que se ejecute
con autograd habilitado/deshabilitado según se este entrenando el modelo o haciendo inferencia
"""
def calc_forward(t_u, t_c, is_train):
    with torch.set_grad_enabled(is_train):  # Habilita o deshabilita autograd según el valor de is_train
        t_p = model(t_u, *params)           # Si is_train=False -> inferencia/validación
        loss = loss_fn(t_p, t_c)
    return loss

In [20]:
# Ciclo de entrenamiento usando la función calc_forward

def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_loss = calc_forward(train_t_u, train_t_c, is_train=True)  # Calculo de la perdida de entrenamiento con autograd activado

        val_loss = calc_forward(val_t_u, val_t_c, is_train=False)  # Perdida de validación con autograd desactivado
        assert val_loss.requires_grad == False

        # Backprop y actualización de parámetros
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()


In [22]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000,
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un,
    val_t_u = val_t_un,
    train_t_c = train_t_c,
    val_t_c = val_t_c)

params

tensor([  5.3056, -16.8024], requires_grad=True)