# LSTM & Redes Neuronales
En esta sección se realizara buscara darle una solucion al problema por medio de un combinacion de una Long Short Term Memory (LSTM) y una Red Neuronal

In [24]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

# Cargo los datasets

In [25]:
X_dev_full = pd.read_csv('../data/split/X_dev_full.csv', index_col=0)
X_test_full = pd.read_csv('../data/split/X_test_full.csv', index_col=0)
y_dev = pd.read_csv('../data/split/y_dev.csv', index_col=0)
y_test = pd.read_csv('../data/split/y_test.csv', index_col=0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando device:", device)

# Targets que querés predecir
target_cols = [
    "altura_max_m",
    "ganancia_altura_m",
    "duracion_min",
    "distancia_km",
    "velocidad_promedio_kmh",
    "num_termicas",
    "intensidad_termicas_mean_ms",
    "tiempo_en_termicas_min",
    "tasa_ascenso_mean_ms",
]

Usando device: cpu


In [26]:
# ==============================
# 3) DEFINIR COLUMNAS SECUENCIALES Y ESTÁTICAS
# ==============================

cols = X_dev_full.columns.tolist()

# Horas del 09h al 18h
hours = [f"{h:02d}h" for h in range(9, 19)]  # ['09h', ..., '18h']

# Columnas que terminan en _09h, ..., _18h → secuenciales
seq_cols = [c for c in cols if any(c.endswith(f"_{h}") for h in hours)]

# Prefijos de clima, ej: 'solar_rad', 'temp_2m', etc.
prefixes = sorted({c.rsplit("_", 1)[0] for c in seq_cols})

# Grupos por timestep: para cada hora, todas las variables de clima
seq_col_groups = [
    [f"{p}_{h}" for p in prefixes]
    for h in hours
]

# Columnas estáticas: todo lo que no es secuencial
static_cols = [c for c in cols if c not in seq_cols]

print("Variables secuenciales por timestep:", prefixes)
print("Horas:", hours)
print("N seq features por timestep:", len(prefixes))
print("N static features:", len(static_cols))

Variables secuenciales por timestep: ['boundary_layer_height', 'cape', 'cloud_cover', 'precipitation', 'pressure', 'skin_temp', 'solar_rad', 'temp_2m', 'wind_speed', 'wind_u', 'wind_v']
Horas: ['09h', '10h', '11h', '12h', '13h', '14h', '15h', '16h', '17h', '18h']
N seq features por timestep: 11
N static features: 15


In [27]:
# ==============================
# 4) DATASET DE PYTORCH
# ==============================

class FlightLSTMDataset(Dataset):
    def __init__(self, X_df, y_df, seq_col_groups, static_cols, target_cols):
        self.X = X_df.reset_index(drop=True)
        self.y_df = y_df.reset_index(drop=True)
        self.seq_col_groups = seq_col_groups
        self.static_cols = static_cols
        self.target_cols = target_cols

    def __len__(self):
        return len(self.X)

    def _row_to_seq(self, row):
        """
        Devuelve un tensor (T, F) sin usar numpy → sólo listas de Python.
        T = nº timesteps (10: 09h..18h)
        F = nº variables por timestep (len(prefixes))
        """
        seq_list = []
        for group in self.seq_col_groups:
            # row[group] es una Series de pandas
            vals = row[group].astype("float32").tolist()  # list[float]
            seq_list.append(vals)  # list[list[float]]

        # Ahora seq_list es una lista de listas → torch.tensor puede manejarlo sin numpy
        seq = torch.tensor(seq_list, dtype=torch.float32)  # (T, F)
        return seq

    def __getitem__(self, idx):
        row = self.X.iloc[idx]

        # Secuencia (T, F)
        seq = self._row_to_seq(row)

        # Features estáticas (S,)
        static_vals = row[self.static_cols].astype("float32").tolist()
        static = torch.tensor(static_vals, dtype=torch.float32)

        # Targets (n_targets,)
        y_row = self.y_df.iloc[idx][self.target_cols].astype("float32").tolist()
        y = torch.tensor(y_row, dtype=torch.float32)

        return seq, static, y


In [28]:
# ==============================
# 5) MODELO LSTM + MLP
# ==============================

class ClimateLSTMRegressor(nn.Module):
    def __init__(
        self,
        seq_input_dim: int,
        static_dim: int,
        n_targets: int,
        hidden_dim: int = 64,
        num_layers: int = 1,
        bidirectional: bool = True,
        dropout: float = 0.1,
    ):
        super().__init__()

        self.bidirectional = bidirectional

        self.lstm = nn.LSTM(
            input_size=seq_input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional,
            dropout=dropout if num_layers > 1 else 0.0,
        )

        lstm_out_dim = hidden_dim * (2 if bidirectional else 1)

        self.mlp = nn.Sequential(
            nn.Linear(lstm_out_dim + static_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, n_targets),
        )

    def forward(self, seq, static):
        # seq: (batch, T, F), static: (batch, S)
        _, (h_n, _) = self.lstm(seq)  # h_n: (num_layers*num_dirs, batch, hidden_dim)

        if self.bidirectional:
            h_forward = h_n[-2]   # (batch, hidden_dim)
            h_backward = h_n[-1]  # (batch, hidden_dim)
            h_last = torch.cat([h_forward, h_backward], dim=1)  # (batch, 2*hidden_dim)
        else:
            h_last = h_n[-1]  # (batch, hidden_dim)

        x = torch.cat([h_last, static], dim=1)  # (batch, lstm_out_dim + static_dim)
        out = self.mlp(x)  # (batch, n_targets)
        return out

In [29]:
# ==============================
# 6) FUNCIONES DE TRAIN / EVAL
# ==============================

def run_epoch(model, loader, optimizer=None, device=device):
    train = optimizer is not None
    if train:
        model.train()
    else:
        model.eval()

    criterion = nn.MSELoss()

    total_loss = 0.0
    all_y = []
    all_pred = []

    for seq, static, y in loader:
        seq = seq.to(device)
        static = static.to(device)
        y = y.to(device)

        if train:
            optimizer.zero_grad()

        with torch.set_grad_enabled(train):
            pred = model(seq, static)
            loss = criterion(pred, y)

            if train:
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
                optimizer.step()

        total_loss += loss.item() * y.size(0)
        all_y.append(y.detach().cpu())
        all_pred.append(pred.detach().cpu())

    total_loss /= len(loader.dataset)
    all_y = torch.cat(all_y, dim=0).numpy()       # (N, n_targets)
    all_pred = torch.cat(all_pred, dim=0).numpy() # (N, n_targets)

    # R² por target y promedio
    r2_raw = r2_score(all_y, all_pred, multioutput="raw_values")
    r2_mean = float(np.mean(r2_raw))
    return total_loss, r2_mean, r2_raw

In [30]:
# ==============================
# 7) CROSS-VALIDATION (K-FOLD) + CURVAS DE APRENDIZAJE
# ==============================

from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import numpy as np

n_splits = 5
batch_size = 32
n_epochs = 40
lr = 1e-3
weight_decay = 1e-5

kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

fold_results = []

seq_input_dim = len(prefixes)
static_dim = len(static_cols)
n_targets = len(target_cols)

for fold, (train_idx, val_idx) in enumerate(kf.split(X_dev_full)):
    print("\n" + "="*70)
    print(f"FOLD {fold+1}/{n_splits}")
    print("="*70)

    # Split por índices
    X_tr = X_dev_full.iloc[train_idx].reset_index(drop=True)
    y_tr = y_dev.iloc[train_idx].reset_index(drop=True)
    X_va = X_dev_full.iloc[val_idx].reset_index(drop=True)
    y_va = y_dev.iloc[val_idx].reset_index(drop=True)

    # Datasets y loaders
    train_ds = FlightLSTMDataset(X_tr, y_tr, seq_col_groups, static_cols, target_cols)
    val_ds   = FlightLSTMDataset(X_va, y_va, seq_col_groups, static_cols, target_cols)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size*2, shuffle=False)

    # Re-inicializar modelo y optimizador en cada fold
    model = ClimateLSTMRegressor(
        seq_input_dim=seq_input_dim,
        static_dim=static_dim,
        n_targets=n_targets,
        hidden_dim=64,
        num_layers=1,
        bidirectional=True,
        dropout=0.1,
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    best_val_r2 = -1e9
    best_state = None

    # Historial para curvas de aprendizaje
    history_train_r2 = []
    history_val_r2 = []
    history_train_loss = []
    history_val_loss = []

    for epoch in range(1, n_epochs + 1):
        train_loss, train_r2, _ = run_epoch(model, train_loader, optimizer=optimizer)
        val_loss, val_r2, _ = run_epoch(model, val_loader, optimizer=None)

        # Guardar historial
        history_train_r2.append(train_r2)
        history_val_r2.append(val_r2)
        history_train_loss.append(train_loss)
        history_val_loss.append(val_loss)

        # Guardar mejor modelo (por R² de validación promedio)
        if val_r2 > best_val_r2:
            best_val_r2 = val_r2
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

        if epoch % 5 == 0 or epoch == 1:
            print(
                f"Epoch {epoch:03d} | "
                f"Train Loss={train_loss:.3f}, R2={train_r2:.3f} | "
                f"Val Loss={val_loss:.3f}, R2={val_r2:.3f}"
            )

    # ---- CURVAS DE APRENDIZAJE PARA ESTE FOLD ----
    epochs_axis = range(1, n_epochs + 1)

    # R²
    plt.figure(figsize=(8, 4))
    plt.plot(epochs_axis, history_train_r2, label="Train R²")
    plt.plot(epochs_axis, history_val_r2, label="Val R²")
    plt.xlabel("Época")
    plt.ylabel("R²")
    plt.title(f"Curva de aprendizaje R² - Fold {fold+1}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # Loss (MSE)
    plt.figure(figsize=(8, 4))
    plt.plot(epochs_axis, history_train_loss, label="Train Loss")
    plt.plot(epochs_axis, history_val_loss, label="Val Loss")
    plt.xlabel("Época")
    plt.ylabel("MSE")
    plt.title(f"Curva de aprendizaje Loss - Fold {fold+1}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # ---- Evaluar el mejor estado del fold en el set de validación ----
    model.load_state_dict({k: v.to(device) for k, v in best_state.items()})
    _, val_r2_final, val_r2_targets = run_epoch(model, val_loader, optimizer=None)

    print("\nMejor R² de validación (promedio targets):", f"{val_r2_final:.4f}")
    print("R² por target en este fold:")
    for name, r2v in zip(target_cols, val_r2_targets):
        print(f"  {name:30s}: {r2v:.4f}")

    fold_results.append(val_r2_targets)



FOLD 1/5


RuntimeError: Numpy is not available

In [None]:
# ==============================
# 8) RESUMEN FINAL DE CV
# ==============================

fold_results = np.vstack(fold_results)  # shape (n_splits, n_targets)

print("\n" + "="*70)
print("RESUMEN CROSS-VALIDATION (LSTM+MLP)")
print("="*70)

mean_r2_targets = fold_results.mean(axis=0)
std_r2_targets = fold_results.std(axis=0)

for name, mu, sd in zip(target_cols, mean_r2_targets, std_r2_targets):
    print(f"{name:30s}: R² CV medio = {mu:.4f} ± {sd:.4f}")

print("-"*70)
print("R² CV PROMEDIO (sobre todos los targets):", f"{mean_r2_targets.mean():.4f}")
print("="*70)