### 3.6. Optimización de hiperparámetros

Se realizó una búsqueda exhaustiva (**Grid Search**) sobre los modelos **LSTM** y **GRU**, evaluando las combinaciones:

- **Tamaño de ventana** (`window_size`): 10, 20, 30  
- **Unidades recurrentes** (`units`): 32, 64, 128  
- **Tamaño de lote** (`batch_size`): 32, 64  
- **Tasa de aprendizaje** (`learning_rate`): 0.001, 0.0005  

**Protocolo de evaluación.**  
Los datos se dividieron respetando el orden temporal: 80 % para entrenamiento y 20 % para prueba.  
Dentro del bloque de entrenamiento, el **10 % final** se empleó como **validación**.  
La métrica de selección fue el **MSE (Error Cuadrático Medio)** en validación.

**Procedimiento.**  
Para cada combinación de hiperparámetros y cada arquitectura (LSTM/GRU), el modelo se entrenó con `EarlyStopping` y se registró el MSE en validación.  
La mejor configuración se **reentrenó en train+val** y se evaluó en el conjunto de **prueba**, reportando **MAE, RMSE y MSE**.

Este esquema garantiza una comparación justa entre configuraciones, evita la fuga temporal y ofrece una línea base sólida para los modelos recurrentes.


In [2]:
import tensorflow as tf
print("Dispositivos detectados:")
print(tf.config.list_physical_devices())

if tf.config.list_physical_devices('GPU'):
    print("✅ TensorFlow está usando GPU correctamente.")
else:
    print("⚠️ Solo CPU detectada, será mucho más lento.")


Dispositivos detectados:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
⚠️ Solo CPU detectada, será mucho más lento.


In [1]:
# ============================================
# 0) Imports y paths
# ============================================
import os, math, itertools, json
import numpy as np
import pandas as pd
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.metrics import mean_absolute_error, mean_squared_error

DATA_DIR   = Path("../data/processed/ready_for_modeling")
OUT_DIR    = Path("../results/hparam_search")
OUT_DIR.mkdir(parents=True, exist_ok=True)

TICKERS = ["BBVA", "SAN"]
TARGET_COL = "Close"

# Grids (según documento)
WINDOW_GRID = [10, 20, 30]
UNITS_GRID  = [32, 64, 128]
BATCH_GRID  = [32, 64]
LR_GRID     = [1e-3, 5e-4]

EPOCHS = 120
VAL_RATIO_WITHIN_TRAIN = 0.10   # 10% de train para validación
TEST_RATIO = 0.20               # 80/20 temporal
PATIENCE = 10

tf.random.set_seed(42)
np.random.seed(42)

# ============================================
# 1) Utilidades de secuenciado y splits
# ============================================
def make_sequences(df, window, horizon=1, target_col="Close"):
    Xs, ys = [], []
    vals = df.values.astype(np.float32)
    y_idx = df.columns.get_loc(target_col)
    last_start = len(df) - window - horizon + 1
    for s in range(last_start):
        e = s + window
        Xs.append(vals[s:e, :])
        ys.append(vals[e + horizon - 1, y_idx])
    return np.stack(Xs), np.array(ys, dtype=np.float32)

def temporal_split(df, test_ratio=0.2):
    cut = int(len(df) * (1 - test_ratio))
    return df.iloc[:cut].copy(), df.iloc[cut:].copy()

def split_train_val_by_tail(df_train_block, val_ratio=0.1):
    cut = int(len(df_train_block) * (1 - val_ratio))
    return df_train_block.iloc[:cut].copy(), df_train_block.iloc[cut:].copy()

# ============================================
# 2) Model builders
# ============================================
def build_lstm(n_features, units, lr):
    model = keras.Sequential([
        layers.Input(shape=(None, n_features)),
        layers.LSTM(units),
        layers.Dense(1)
    ])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss="mse")
    return model

def build_gru(n_features, units, lr):
    model = keras.Sequential([
        layers.Input(shape=(None, n_features)),
        layers.GRU(units),
        layers.Dense(1)
    ])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss="mse")
    return model

# ============================================
# 3) Grid Search por ticker y modelo
# ============================================
def evaluate_on_val(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    return mse, rmse, mae

all_results = []

for ticker in TICKERS:
    print(f"\n===== {ticker} =====")
    df = pd.read_csv(DATA_DIR / f"{ticker}_final_ready.csv", parse_dates=["Date"], index_col="Date").sort_index()
    n_features = df.shape[1]

    # Split temporal 80/20
    df_tr_block, df_te_block = temporal_split(df, test_ratio=TEST_RATIO)

    # Para cada window generamos secuencias (sin fuga)
    for window in WINDOW_GRID:
        # train/val dentro del bloque de train
        df_tr_core, df_val_core = split_train_val_by_tail(df_tr_block, val_ratio=VAL_RATIO_WITHIN_TRAIN)

        X_tr, y_tr   = make_sequences(df_tr_core, window=window, target_col=TARGET_COL)
        X_val, y_val = make_sequences(df_val_core, window=window, target_col=TARGET_COL)
        X_te,  y_te  = make_sequences(df_te_block,  window=window, target_col=TARGET_COL)

        print(f"window={window} | X_train={X_tr.shape}, X_val={X_val.shape}, X_test={X_te.shape}")

        # Saltar si no hay suficientes puntos para val/test
        if X_tr.size == 0 or X_val.size == 0 or X_te.size == 0:
            print("Insuficientes muestras, se omite esta ventana.")
            continue

        # Grid exhaustivo
        for units, batch, lr in itertools.product(UNITS_GRID, BATCH_GRID, LR_GRID):
            config = dict(window=window, units=units, batch=batch, lr=lr)

            for arch_name, builder in [("LSTM", build_lstm), ("GRU", build_gru)]:
                tf.keras.backend.clear_session()

                model = builder(n_features=n_features, units=units, lr=lr)
                cb = [
                    keras.callbacks.EarlyStopping(monitor="val_loss", patience=PATIENCE,
                                                  restore_best_weights=True, verbose=0)
                ]
                hist = model.fit(
                    X_tr, y_tr,
                    validation_data=(X_val, y_val),
                    epochs=EPOCHS,
                    batch_size=batch,
                    verbose=0,
                    callbacks=cb,
                )

                # Métrica de validación (criterio de selección)
                y_val_hat = model.predict(X_val, verbose=0).ravel()
                mse_val, rmse_val, mae_val = evaluate_on_val(y_val, y_val_hat)

                all_results.append({
                    "Ticker": ticker, "Model": arch_name,
                    **config,
                    "val_MSE": mse_val, "val_RMSE": rmse_val, "val_MAE": mae_val,
                    "best_epoch": np.argmin(hist.history["val_loss"]) + 1
                })

    # Guardar resultados parciales por ticker
    df_res_t = pd.DataFrame([r for r in all_results if r["Ticker"] == ticker])
    df_res_t.to_csv(OUT_DIR / f"{ticker}_grid_results.csv", index=False)
    print(f"→ Guardado grid de {ticker}: {OUT_DIR / (ticker + '_grid_results.csv')}")

# ============================================
# 4) Selección de mejor config, retrain en train+val y evaluación en test
# ============================================
summary_rows = []

for ticker in TICKERS:
    df = pd.read_csv(DATA_DIR / f"{ticker}_final_ready.csv", parse_dates=["Date"], index_col="Date").sort_index()
    n_features = df.shape[1]

    # Repetimos splits y secuencias para el BEST window
    grid_file = OUT_DIR / f"{ticker}_grid_results.csv"
    best = pd.read_csv(grid_file).sort_values(["val_MSE"]).iloc[0].to_dict()

    window = int(best["window"]); units = int(best["units"])
    batch  = int(best["batch"]);  lr    = float(best["lr"])
    model_name = best["Model"]

    # Bloques
    df_tr_block, df_te_block = temporal_split(df, test_ratio=TEST_RATIO)
    # Train+Val juntos para reentrenar
    X_trval, y_trval = make_sequences(df_tr_block, window=window, target_col=TARGET_COL)
    X_test,  y_test  = make_sequences(df_te_block,  window=window, target_col=TARGET_COL)

    builder = build_lstm if model_name == "LSTM" else build_gru
    model = builder(n_features=n_features, units=units, lr=lr)
    cb = [keras.callbacks.EarlyStopping(monitor="loss", patience=PATIENCE, restore_best_weights=True, verbose=0)]

    hist = model.fit(X_trval, y_trval, epochs=EPOCHS, batch_size=batch, verbose=0, callbacks=cb)

    # Evaluación en test
    y_hat_test = model.predict(X_test, verbose=0).ravel()
    mse_te = mean_squared_error(y_test, y_hat_test)
    rmse_te = math.sqrt(mse_te)
    mae_te = mean_absolute_error(y_test, y_hat_test)

    # Guardar predicciones test
    preds = pd.DataFrame({"y_true": y_test, "y_pred": y_hat_test})
    preds.to_csv(OUT_DIR / f"{ticker}_{model_name}_best_preds_test.csv", index=False)

    summary_rows.append({
        "Ticker": ticker,
        "BestModel": model_name, "window": window, "units": units, "batch": batch, "lr": lr,
        "Test_MSE": mse_te, "Test_RMSE": rmse_te, "Test_MAE": mae_te
    })

# Resumen final
df_summary = pd.DataFrame(summary_rows)
df_summary.to_csv(OUT_DIR / "best_summary.csv", index=False)
print("\n✅ Búsqueda completada. Resumen de mejores configuraciones:")
print(df_summary)



===== BBVA =====
window=10 | X_train=(4752, 10, 16), X_val=(520, 10, 16), X_test=(1313, 10, 16)



KeyboardInterrupt: 