# Gradient Descent

## Example 1

In [None]:
# 3pps
import matplotlib.pyplot as plt
import numpy as np

# Definición de la función


def function(input: np.ndarray) -> np.ndarray:
    assert input.shape[-1] == 2, "La entrada debe contener 2 elementos"
    return np.sin(input[:, 0]) * np.cos(input[:, 1]) + np.sin(
        0.5 * input[:, 0]
    ) * np.cos(0.5 * input[:, 1])


# Cálculo del gradiente (derivadas parciales)


def gradiente(input: np.ndarray) -> np.ndarray:
    assert input.shape[-1] == 2, "La entrada debe contener 2 elementos"

    df_x1 = np.cos(input[:, 0]) * np.cos(input[:, 1]) + 0.5 * np.cos(
        0.5 * input[:, 0]
    ) * np.cos(0.5 * input[:, 1])
    df_x2 = -np.sin(input[:, 0]) * np.sin(input[:, 1]) - 0.5 * np.sin(
        0.5 * input[:, 0]
    ) * np.sin(0.5 * input[:, 1])

    return np.stack([df_x1, df_x2], axis=1)


# Algoritmo de descenso del gradiente


def descenso_gradiente(
    num_puntos: int = 10, num_iteraciones: int = 30, learning_rate: float = 1e-3
):
    dim = 2
    X = np.random.rand(num_puntos, dim) * 10  # Inicialización en el dominio [0,10]
    trayectorias = [X.copy()]

    for _ in range(num_iteraciones):
        X = X - learning_rate * gradiente(input=X)
        trayectorias.append(X.copy())

    return np.array(trayectorias)


# Ejecución del descenso del gradiente
trayectoria = descenso_gradiente(num_puntos=5, num_iteraciones=30)

# Visualización de trayectorias en el espacio 2D
for i in range(trayectoria.shape[1]):
    plt.plot(trayectoria[:, i, 0], trayectoria[:, i, 1], marker="o")

plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Trayectorias del descenso del gradiente")
plt.show()

## Example 2

In [None]:
# 3pps
import matplotlib.pyplot as plt
import torch

In [None]:
tiempo = torch.arange(0, 20).float()
tiempo

In [None]:
velocidad = torch.randn(20) * 3 + 0.75 * (tiempo - 9.5) ** 2 + 1
plt.scatter(tiempo, velocidad)

In [None]:
velocidad.shape, tiempo.shape

In [None]:
def funcion(instante_tiempo: torch.Tensor, parametros: torch.Tensor) -> float:
    a, b, c = parametros
    return a * (instante_tiempo**2) + (b * instante_tiempo) + c

In [None]:
def loss_function(predicted: torch.Tensor, real: torch.Tensor) -> torch.Tensor:
    return (real - predicted).square().mean()

In [None]:
parametros = torch.randn(3).requires_grad_()
parametros

In [None]:
predicciones = funcion(instante_tiempo=tiempo, parametros=parametros)
predicciones

In [None]:
def show_preds(tiempo, real, preds: torch.Tensor):
    plt.scatter(tiempo, real, color="blue", label="Real")
    plt.scatter(tiempo, preds.detach().cpu().numpy(), color="red", label="Predicho")
    plt.legend()
    plt.show()


show_preds(tiempo, velocidad, predicciones)

In [None]:
perdida = loss_function(predicciones, velocidad)
perdida

Aplicamos backward y comprobamos los gradientes

In [None]:
perdida.backward()
parametros.grad

Podemos utilizar un ratio de aprendizaje, actualizar el gradiente a partir de ese ratio y volver a colocar 0 en los gradientes para realizar una nueva evaluación

In [None]:
lr = 1e-5
parametros.data = parametros.data - lr * parametros.grad.data
parametros.grad = None

In [None]:
predicciones = funcion(instante_tiempo=tiempo, parametros=parametros)
predicciones

In [None]:
show_preds(tiempo, velocidad, predicciones)

In [None]:
def apply_step_training(tiempo, parametros_aprendibles, datos_a_predecir, lr=1e-5):
    predicciones = funcion(instante_tiempo=tiempo, parametros=parametros_aprendibles)
    perdida = loss_function(predicted=predicciones, real=datos_a_predecir)
    perdida.backward()

    # Hacerlo así es más seguro para actualizar los parámetros aprendibles
    with torch.no_grad():
        parametros_aprendibles -= lr * parametros_aprendibles.grad

    # Otra forma de resetear los gradientes
    parametros_aprendibles.grad.zero_()

    show_preds(tiempo, datos_a_predecir, predicciones)
    return predicciones, parametros_aprendibles, perdida

In [None]:
# 3pps
from tqdm import tqdm

In [None]:
num_epochs = 20
parametros_aprendibles = torch.randn(3, requires_grad=True)

for epoch in tqdm(range(num_epochs)):
    predicciones, parametros_aprendibles, perdida = apply_step_training(
        tiempo=tiempo,
        parametros_aprendibles=parametros_aprendibles,
        datos_a_predecir=velocidad,
    )
    print(f"Epoch {epoch+1}, perdida: {perdida}")

In [None]:
def linear_layer(tensor_entrada: torch.Tensor) -> torch.Tensor:

    # (tensor_entrada) -> (B, N)
    # peso -> (B, N, 1)
    # (N)
    return tensor_entrada @ w + b


class CapaLineal:

    def __init__(self, shape_entrada: int) -> None:

        self.w = torch.randn()

## Another example

In [None]:
# 3pps
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch import nn

In [None]:
class Linear(nn.Module):

    def __init__(
        self,
    ) -> None:

        super().__init__()

        self.weight = nn.Parameter(data=torch.rand(1), requires_grad=True)
        self.bias = nn.Parameter(data=torch.rand(1), requires_grad=True)

    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:

        return self.weight * input_tensor + self.bias

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
start = 0
end = 1
steps = 0.02
X = np.arange(start, end, steps)
X

In [None]:
bias = 0.3
weight = 0.7
y = weight * X + bias

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))
X_train.dtype

In [None]:
plt.scatter(X_train, y_train, c="b", s=4, label="Training")
plt.show()
plt.scatter(X_test, y_test, c="g", s=4, label="Testing")
plt.show()

In [None]:
linear_model = Linear()
list(linear_model.parameters())
linear_model.state_dict()

In [None]:
linear_model.eval()
with torch.no_grad():
    predictions = linear_model(X_test)
predictions

De la documentacion: InferenceMode is analogous to no_grad and should be used when you are certain your operations will not interact with autograd (e.g., during data loading or model evaluation). Compared to no_grad, it removes additional overhead by disabling view tracking and version counter bumps. It is also more restrictive, in that tensors created in this mode cannot be used in computations recorded by autograd. Vamos que no tiene en cuenta el trackeo de los gradientes y lo hace más seguro para evitar la actualización de parámetros del modelo. A parte hace más rápida la ejecución de código en inferencia

In [None]:
with torch.inference_mode():
    predictions_2 = linear_model(X_test)
predictions_2

In [None]:
X_test.shape, predictions.shape

In [None]:
plt.scatter(X_test, predictions, c="r", s=4, label="Predictions")
plt.scatter(X_test, y_test, c="b", s=4, label="Real")
plt.legend()
plt.show()

In [None]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(linear_model.parameters())

In [None]:
num_epochs: int = 50

for epoch in range(num_epochs):
    epoch_losses_train = []
    epoch_losses_test = []

    for x, y in zip(X_train, y_train):
        optimizer.zero_grad()

        output_model = linear_model(x)
        loss = loss_fn(output_model, y)

        loss.backward()
        optimizer.step()

        epoch_losses_train.append(loss.item())

    with torch.inference_mode():
        for x, y in zip(X_test, y_test):
            output_model = linear_model(x)
            loss = loss_fn(output_model, y)
            epoch_losses_test.append(loss.item())

    print(
        f"Epoch: {epoch+1}, "
        f"Train Loss: {np.mean(epoch_losses_train):.4f}, "
        f"Test Loss: {np.mean(epoch_losses_test):.4f}"
    )

In [None]:
with torch.inference_mode():
    predictions_trained = linear_model(X_test)
plt.scatter(X_test, predictions_trained, c="r", s=4, label="Predictions")
plt.scatter(X_test, y_test, c="b", s=4, label="Real")
plt.legend()
plt.show()

In [None]:
torch.save(linear_model, "linear_model.pth")
linear_model_loaded = torch.load("linear_model.pth")
linear_model_loaded.state_dict()

In [None]:
with torch.inference_mode():
    predictions_loaded = linear_model_loaded(X_test)
plt.scatter(X_test, predictions_loaded, c="r", s=4, label="Predictions")
plt.scatter(X_test, y_test, c="b", s=4, label="Real")
plt.legend()
plt.show()