# üìä EVALUACI√ìN FINAL DEL ENSEMBLE HETEROG√âNEO PARA PREDICCI√ìN USD/PEN

**Tesis de Maestr√≠a en Ciencias de la Computaci√≥n - UTEC**

Este notebook genera todas las figuras y tablas para el cap√≠tulo de resultados del paper.

---

## Configuraci√≥n Experimental
- **Target:** Log returns del tipo de cambio USD/PEN
- **Horizonte:** h=1 (one-step-ahead)
- **Holdout:** 60 d√≠as (abril-julio 2025)
- **Metodolog√≠a:** Rolling forecast sin reentrenamiento
- **Modelos evaluados:** 5 (TimesFM, CatBoost, ARX, NBEATSx, MOIRAI)
- **Modelos excluidos:** TiDE (sesgo severo), ChronosX (datos inv√°lidos), LSTM (sobreajuste)

In [None]:
# =============================================================================
# CONFIGURACI√ìN E IMPORTS
# =============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.linear_model import Ridge, LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configuraci√≥n de estilo para publicaci√≥n
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({
    'figure.figsize': (12, 7),
    'font.size': 11,
    'font.family': 'sans-serif',
    'axes.titlesize': 14,
    'axes.labelsize': 12,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'figure.dpi': 150,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight',
    'savefig.pad_inches': 0.1
})

# Paleta de colores profesional
COLORS = {
    'TimesFM': '#2E86AB',    # Azul
    'CatBoost': '#A23B72',   # Magenta
    'ARX': '#F18F01',        # Naranja
    'NBEATSx': '#C73E1D',    # Rojo
    'MOIRAI': '#3B1F2B',     # P√∫rpura oscuro
    'Ensemble': '#2ECC71',   # Verde
    'Real': '#1A1A2E'        # Negro
}

print("‚úì Configuraci√≥n cargada")
print(f"  Fecha de ejecuci√≥n: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

In [None]:
# =============================================================================
# CONFIGURACI√ìN DE RUTAS - CONFIGURACI√ìN LOCAL WINDOWS
# =============================================================================

# Ruta base del proyecto
BASE_DIR = Path(r'C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria')

# Ruta al archivo principal de datos
DATA_FILE = BASE_DIR / 'Data.csv'

# Ruta a las predicciones del holdout
PRED_DIR = BASE_DIR / 'predictions_dump'

# Ruta a las predicciones OOF (Out-of-Fold)
OOF_DIR = BASE_DIR / 'oof_predictions'

# Ruta para guardar figuras del paper
OUTPUT_DIR = BASE_DIR / 'figures_paper'
OUTPUT_DIR.mkdir(exist_ok=True)

# Modelos SELECCIONADOS (excluimos TiDE, ChronosX, LSTM)
SELECTED_MODELS = ['TimesFM', 'CatBoost', 'ARX', 'NBEATSx', 'MOIRAI']

# Par√°metros del experimento
N_HOLDOUT = 60
RANDOM_STATE = 42

# Verificaci√≥n de rutas
print("="*60)
print("VERIFICACI√ìN DE RUTAS")
print("="*60)
print(f"‚úì Directorio base: {BASE_DIR}")
print(f"  ‚Üí Existe: {BASE_DIR.exists()}")
print(f"\n‚úì Archivo de datos: {DATA_FILE.name}")
print(f"  ‚Üí Existe: {DATA_FILE.exists()}")
print(f"\n‚úì Predicciones holdout: {PRED_DIR}")
print(f"  ‚Üí Existe: {PRED_DIR.exists()}")
print(f"\n‚úì Predicciones OOF: {OOF_DIR}")
print(f"  ‚Üí Existe: {OOF_DIR.exists()}")
print(f"\n‚úì Salida figuras: {OUTPUT_DIR}")
print(f"\n‚úì Modelos seleccionados: {SELECTED_MODELS}")
print("="*60)

In [None]:
# =============================================================================
# FUNCIONES DE M√âTRICAS
# =============================================================================

def calculate_mase(y_true, y_pred, y_train):
    """Mean Absolute Scaled Error - m√©trica principal"""
    mae = np.mean(np.abs(y_true - y_pred))
    naive_mae = np.mean(np.abs(np.diff(y_train)))
    return mae / naive_mae if naive_mae > 0 else np.inf

def calculate_da(y_true, y_pred):
    """Directional Accuracy - % de aciertos en direcci√≥n"""
    dir_real = np.sign(y_true)
    dir_pred = np.sign(y_pred)
    return np.mean(dir_real == dir_pred) * 100

def calculate_mae(y_true, y_pred):
    """Mean Absolute Error"""
    return np.mean(np.abs(y_true - y_pred))

def calculate_rmse(y_true, y_pred):
    """Root Mean Squared Error"""
    return np.sqrt(np.mean((y_true - y_pred)**2))

print("‚úì Funciones de m√©tricas definidas")

In [None]:
# =============================================================================
# CARGA DE DATOS PRINCIPALES
# =============================================================================

# Cargar dataset principal
df_data = pd.read_csv(DATA_FILE, encoding='utf-8-sig')
df_data['Dates'] = pd.to_datetime(df_data['Dates'], dayfirst=True)
df_data = df_data.sort_values('Dates').reset_index(drop=True)

# Calcular log returns
df_data['PEN_log_ret'] = np.log(df_data['PEN'] / df_data['PEN'].shift(1))

# Definir split
holdout_start_idx = len(df_data) - N_HOLDOUT
train_data = df_data.iloc[:holdout_start_idx].copy()
holdout_data = df_data.iloc[holdout_start_idx:].copy()

# Valores para m√©tricas
y_train = train_data['PEN_log_ret'].dropna().values
holdout_dates = holdout_data['Dates'].values
y_holdout = holdout_data['PEN_log_ret'].values

print(f"‚úì Dataset cargado: {len(df_data):,} observaciones")
print(f"  Rango: {df_data['Dates'].min().date()} a {df_data['Dates'].max().date()}")
print(f"\n‚úì Split definido:")
print(f"  Train: {len(train_data):,} d√≠as ({train_data['Dates'].min().date()} - {train_data['Dates'].max().date()})")
print(f"  Holdout: {len(holdout_data)} d√≠as ({holdout_data['Dates'].min().date()} - {holdout_data['Dates'].max().date()})")

In [None]:
# =============================================================================
# CARGA DE PREDICCIONES DE MODELOS
# =============================================================================

predictions = {}
oof_data = {}

print("Cargando predicciones de modelos seleccionados...\n")

for model in SELECTED_MODELS:
    # Cargar predicciones holdout desde predictions_dump
    pred_file = PRED_DIR / f'pred_{model}.csv'
    
    if pred_file.exists():
        df_pred = pd.read_csv(pred_file)
        df_pred['ds'] = pd.to_datetime(df_pred['ds'])
        predictions[model] = df_pred
        print(f"  ‚úì {model} (holdout): {len(df_pred)} predicciones")
    else:
        print(f"  ‚úó {model} (holdout): archivo no encontrado ({pred_file.name})")
    
    # Cargar OOF desde oof_predictions
    oof_file = OOF_DIR / f'train_oof_{model}.csv'
    
    if oof_file.exists():
        df_oof = pd.read_csv(oof_file)
        df_oof['ds'] = pd.to_datetime(df_oof['ds'])
        oof_data[model] = df_oof
        print(f"  ‚úì {model} (OOF): {len(df_oof)} predicciones")
    else:
        print(f"  ‚úó {model} (OOF): archivo no encontrado ({oof_file.name})")

print(f"\n{'='*60}")
print(f"‚úì {len(predictions)} modelos con predicciones holdout")
print(f"‚úì {len(oof_data)} modelos con predicciones OOF")
print(f"{'='*60}")

In [None]:
# =============================================================================
# CREAR DATASET UNIFICADO PARA EVALUACI√ìN
# =============================================================================

# DataFrame base con fechas del holdout
df_eval = pd.DataFrame({
    'ds': holdout_data['Dates'].values,
    'y_real': y_holdout
})

# A√±adir predicciones de cada modelo
print("Alineando predicciones con holdout...\n")

for model in SELECTED_MODELS:
    if model in predictions:
        df_pred = predictions[model].sort_values('ds').reset_index(drop=True)
        
        # Verificar que las fechas coincidan
        pred_dates = set(df_pred['ds'].dt.date)
        holdout_dates_set = set(pd.to_datetime(df_eval['ds']).dt.date)
        
        if pred_dates == holdout_dates_set:
            df_eval[model] = df_pred['y_pred'].values
            print(f"  ‚úì {model}: {len(df_pred)} predicciones alineadas")
        else:
            # Intentar merge por fecha
            df_temp = df_pred[['ds', 'y_pred']].copy()
            df_temp['ds'] = pd.to_datetime(df_temp['ds']).dt.date
            df_eval['ds_date'] = pd.to_datetime(df_eval['ds']).dt.date
            df_eval = df_eval.merge(
                df_temp.rename(columns={'y_pred': model, 'ds': 'ds_date'}),
                on='ds_date',
                how='left'
            )
            df_eval = df_eval.drop(columns=['ds_date'])
            n_valid = df_eval[model].notna().sum()
            print(f"  ‚ö† {model}: {n_valid}/{len(df_eval)} predicciones (merge por fecha)")
    else:
        print(f"  ‚úó {model}: no encontrado en predicciones")

print(f"\n{'='*60}")
print(f"‚úì Dataset de evaluaci√≥n creado: {df_eval.shape}")
print(f"  Columnas: {list(df_eval.columns)}")
print(f"  Per√≠odo: {pd.to_datetime(df_eval['ds']).min().date()} a {pd.to_datetime(df_eval['ds']).max().date()}")
print(f"{'='*60}")

df_eval.head()

In [None]:
# =============================================================================
# C√ÅLCULO DE M√âTRICAS POR MODELO
# =============================================================================

metrics_results = []

for model in SELECTED_MODELS:
    if model in df_eval.columns:
        y_pred = df_eval[model].values
        
        # Filtrar NaNs si los hay
        mask = ~np.isnan(y_pred)
        y_true_clean = y_holdout[mask]
        y_pred_clean = y_pred[mask]
        
        metrics_results.append({
            'Model': model,
            'MASE': calculate_mase(y_true_clean, y_pred_clean, y_train),
            'DA': calculate_da(y_true_clean, y_pred_clean),
            'MAE': calculate_mae(y_true_clean, y_pred_clean),
            'RMSE': calculate_rmse(y_true_clean, y_pred_clean)
        })

df_metrics = pd.DataFrame(metrics_results).sort_values('MASE').reset_index(drop=True)

print("="*70)
print("M√âTRICAS DE MODELOS EN HOLDOUT (60 d√≠as)")
print("="*70)
print(df_metrics.to_string(index=False))
print("="*70)
print(f"\nüèÜ Mejor MASE: {df_metrics.iloc[0]['Model']} ({df_metrics.iloc[0]['MASE']:.4f})")
print(f"üèÜ Mejor DA: {df_metrics.loc[df_metrics['DA'].idxmax(), 'Model']} ({df_metrics['DA'].max():.2f}%)")

In [None]:
# =============================================================================
# M√âTRICAS OOF (TRAIN) PARA COMPARACI√ìN
# =============================================================================

oof_metrics = []

for model in SELECTED_MODELS:
    if model in oof_data:
        df_oof = oof_data[model]
        y_true_oof = df_oof['y_real'].values
        y_pred_oof = df_oof['y_pred'].values
        
        # Filtrar NaNs
        mask = ~(np.isnan(y_true_oof) | np.isnan(y_pred_oof))
        y_true_oof = y_true_oof[mask]
        y_pred_oof = y_pred_oof[mask]
        
        oof_metrics.append({
            'Model': model,
            'MASE_OOF': calculate_mase(y_true_oof, y_pred_oof, y_true_oof),
            'DA_OOF': calculate_da(y_true_oof, y_pred_oof),
            'N_samples': len(y_true_oof)
        })

df_oof_metrics = pd.DataFrame(oof_metrics)

# Merge para comparaci√≥n
df_comparison = df_metrics.merge(df_oof_metrics, on='Model', how='left')
df_comparison['ŒîMASE'] = df_comparison['MASE'] - df_comparison['MASE_OOF']
df_comparison['ŒîDA'] = df_comparison['DA'] - df_comparison['DA_OOF']

print("\n" + "="*80)
print("COMPARACI√ìN TRAIN (OOF) vs HOLDOUT")
print("="*80)
print(df_comparison[['Model', 'MASE_OOF', 'MASE', 'ŒîMASE', 'DA_OOF', 'DA', 'ŒîDA']].to_string(index=False))
print("="*80)

In [None]:
# =============================================================================
# ESTRATEGIAS DE ENSEMBLE
# =============================================================================

# Obtener columnas de modelos disponibles
available_models = [m for m in SELECTED_MODELS if m in df_eval.columns]
X_holdout = df_eval[available_models].values

# Preparar OOF para meta-learner
common_dates = None
for model in available_models:
    if model in oof_data:
        model_dates = set(oof_data[model]['ds'].dt.date)
        if common_dates is None:
            common_dates = model_dates
        else:
            common_dates = common_dates.intersection(model_dates)

# Crear dataset OOF alineado
if common_dates:
    common_dates = sorted(list(common_dates))
    X_oof_list = []
    y_oof_list = None

    for model in available_models:
        if model in oof_data:
            df_model = oof_data[model].copy()
            df_model['date_only'] = df_model['ds'].dt.date
            df_model = df_model[df_model['date_only'].isin(common_dates)]
            df_model = df_model.sort_values('date_only').reset_index(drop=True)
            X_oof_list.append(df_model['y_pred'].values)
            if y_oof_list is None:
                y_oof_list = df_model['y_real'].values

    X_oof = np.column_stack(X_oof_list)
    y_oof = y_oof_list
    print(f"‚úì Datos OOF preparados: {X_oof.shape}")
else:
    print("‚ö† No hay fechas comunes en OOF, usando holdout para ensemble simple")
    X_oof = X_holdout
    y_oof = y_holdout

# Diccionario de estrategias
strategies = {}

# 1. Simple Average
strategies['Simple Average'] = np.nanmean(X_holdout, axis=1)

# 2. Median
strategies['Median'] = np.nanmedian(X_holdout, axis=1)

# 3. Weighted Average (1/MASE)
mase_weights = {}
for _, row in df_metrics.iterrows():
    if row['Model'] in available_models:
        mase_weights[row['Model']] = 1 / row['MASE']
total_weight = sum(mase_weights.values())
y_weighted = np.zeros(len(y_holdout))
for i, model in enumerate(available_models):
    if model in mase_weights:
        y_weighted += X_holdout[:, i] * (mase_weights[model] / total_weight)
strategies['Weighted (1/MASE)'] = y_weighted

# 4. Top 3 Average
top3_models = df_metrics.nsmallest(3, 'MASE')['Model'].tolist()
top3_models = [m for m in top3_models if m in available_models]
top3_idx = [available_models.index(m) for m in top3_models]
strategies['Top 3 Average'] = np.nanmean(X_holdout[:, top3_idx], axis=1)

# 5. Ridge Meta-Learner
try:
    scaler = StandardScaler()
    X_oof_scaled = scaler.fit_transform(X_oof)
    X_holdout_scaled = scaler.transform(X_holdout)

    ridge = Ridge(alpha=100.0, random_state=RANDOM_STATE)
    ridge.fit(X_oof_scaled, y_oof)
    strategies['Ridge Meta-Learner'] = ridge.predict(X_holdout_scaled)
    print("‚úì Ridge Meta-Learner entrenado")
except Exception as e:
    print(f"‚ö† Error en Ridge Meta-Learner: {e}")

# Calcular m√©tricas para cada estrategia
ensemble_results = []
for name, y_pred in strategies.items():
    # Filtrar NaNs
    mask = ~np.isnan(y_pred)
    ensemble_results.append({
        'Strategy': name,
        'MASE': calculate_mase(y_holdout[mask], y_pred[mask], y_train),
        'DA': calculate_da(y_holdout[mask], y_pred[mask]),
        'MAE': calculate_mae(y_holdout[mask], y_pred[mask])
    })

df_ensemble = pd.DataFrame(ensemble_results).sort_values('MASE').reset_index(drop=True)

print("\n" + "="*70)
print("ESTRATEGIAS DE ENSEMBLE")
print("="*70)
print(df_ensemble.to_string(index=False))
print("="*70)

---

# üìà FIGURAS PARA EL PAPER

Las siguientes celdas generan todas las figuras en alta resoluci√≥n para el paper.

In [None]:
# =============================================================================
# FIGURA 1: M√âTRICAS POR MODELO (BARRAS HORIZONTALES)
# =============================================================================

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Ordenar por MASE
df_sorted = df_metrics.sort_values('MASE')

# MASE
ax1 = axes[0]
colors_mase = [COLORS.get(m, '#666666') for m in df_sorted['Model']]
bars1 = ax1.barh(df_sorted['Model'], df_sorted['MASE'], color=colors_mase, edgecolor='white', linewidth=0.5)
ax1.axvline(x=1.0, color='#E74C3C', linestyle='--', linewidth=2, label='Naive baseline (MASE=1)')
ax1.set_xlabel('MASE', fontweight='bold')
ax1.set_title('(a) Mean Absolute Scaled Error', fontweight='bold', pad=10)
ax1.legend(loc='lower right')
ax1.set_xlim(0, max(df_sorted['MASE']) * 1.15)
for i, (_, row) in enumerate(df_sorted.iterrows()):
    ax1.text(row['MASE'] + 0.01, i, f'{row["MASE"]:.3f}', va='center', fontsize=10, fontweight='bold')

# DA
ax2 = axes[1]
df_sorted_da = df_metrics.sort_values('DA', ascending=True)
colors_da = [COLORS.get(m, '#666666') for m in df_sorted_da['Model']]
bars2 = ax2.barh(df_sorted_da['Model'], df_sorted_da['DA'], color=colors_da, edgecolor='white', linewidth=0.5)
ax2.axvline(x=50, color='#E74C3C', linestyle='--', linewidth=2, label='Random (50%)')
ax2.set_xlabel('Directional Accuracy (%)', fontweight='bold')
ax2.set_title('(b) Directional Accuracy', fontweight='bold', pad=10)
ax2.legend(loc='lower right')
ax2.set_xlim(0, 70)
for i, (_, row) in enumerate(df_sorted_da.iterrows()):
    ax2.text(row['DA'] + 0.5, i, f'{row["DA"]:.1f}%', va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig1_metricas_modelos.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig1_metricas_modelos.pdf')
print(f"‚úì Guardada: fig1_metricas_modelos.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 2: SCATTER PLOT MASE vs DA (FIGURA CLAVE DEL PAPER)
# =============================================================================

fig, ax = plt.subplots(figsize=(12, 9))

# Preparar todos los datos
all_results = []

# Modelos individuales
for _, row in df_metrics.iterrows():
    all_results.append({
        'Name': row['Model'],
        'MASE': row['MASE'],
        'DA': row['DA'],
        'Type': 'Individual',
        'Color': COLORS.get(row['Model'], '#666666')
    })

# Estrategias ensemble
for _, row in df_ensemble.iterrows():
    all_results.append({
        'Name': row['Strategy'],
        'MASE': row['MASE'],
        'DA': row['DA'],
        'Type': 'Ensemble',
        'Color': COLORS['Ensemble']
    })

df_all = pd.DataFrame(all_results)

# Regi√≥n √≥ptima (sombreado)
ax.fill_between([0.88, 0.98], 55, 65, alpha=0.15, color='green', label='Regi√≥n Pareto-√≥ptima')

# L√≠neas de referencia
ax.axhline(y=50, color='#E74C3C', linestyle='--', linewidth=1.5, alpha=0.7, label='Random DA (50%)')
ax.axvline(x=1.0, color='#3498DB', linestyle='--', linewidth=1.5, alpha=0.7, label='Naive MASE (1.0)')

# Plot modelos individuales
for _, row in df_all[df_all['Type'] == 'Individual'].iterrows():
    ax.scatter(row['MASE'], row['DA'], s=250, c=row['Color'], 
               marker='o', edgecolors='white', linewidths=2, zorder=5)
    ax.annotate(row['Name'], (row['MASE'], row['DA']), 
                textcoords="offset points", xytext=(8, 8), 
                fontsize=11, fontweight='bold', color=row['Color'])

# Plot estrategias ensemble
for _, row in df_all[df_all['Type'] == 'Ensemble'].iterrows():
    ax.scatter(row['MASE'], row['DA'], s=200, c=row['Color'], 
               marker='s', edgecolors='white', linewidths=2, zorder=5, alpha=0.8)
    ax.annotate(row['Name'], (row['MASE'], row['DA']), 
                textcoords="offset points", xytext=(8, -12), 
                fontsize=9, fontstyle='italic', color='#555555')

# Leyenda de tipos
from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], marker='o', color='w', markerfacecolor='gray', markersize=12, label='Modelo Individual'),
    Line2D([0], [0], marker='s', color='w', markerfacecolor=COLORS['Ensemble'], markersize=12, label='Estrategia Ensemble'),
    Line2D([0], [0], color='#E74C3C', linestyle='--', label='Random DA (50%)'),
    Line2D([0], [0], color='#3498DB', linestyle='--', label='Naive MASE (1.0)'),
]
ax.legend(handles=legend_elements, loc='lower left', fontsize=10)

ax.set_xlabel('MASE (menor es mejor)', fontsize=13, fontweight='bold')
ax.set_ylabel('Directional Accuracy % (mayor es mejor)', fontsize=13, fontweight='bold')
ax.set_title('Trade-off MASE vs Directional Accuracy\nModelos Individuales y Estrategias de Ensemble', 
             fontsize=14, fontweight='bold', pad=15)

# L√≠mites din√°micos
mase_min = df_all['MASE'].min() * 0.95
mase_max = df_all['MASE'].max() * 1.05
da_min = df_all['DA'].min() - 5
da_max = df_all['DA'].max() + 5
ax.set_xlim(mase_min, mase_max)
ax.set_ylim(da_min, da_max)

# Grid
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig2_scatter_mase_da.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig2_scatter_mase_da.pdf')
print(f"‚úì Guardada: fig2_scatter_mase_da.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 3: PREDICCIONES vs VALORES REALES EN HOLDOUT
# =============================================================================

fig, ax = plt.subplots(figsize=(16, 7))

# Plot valores reales
ax.plot(df_eval['ds'], df_eval['y_real'], color=COLORS['Real'], linewidth=2.5, 
        label='Real', marker='o', markersize=4, zorder=10)

# Plot cada modelo
for model in SELECTED_MODELS:
    if model in df_eval.columns:
        ax.plot(df_eval['ds'], df_eval[model], '--', linewidth=1.3, 
                label=model, alpha=0.8, color=COLORS.get(model, 'gray'))

ax.axhline(y=0, color='gray', linestyle=':', alpha=0.5, linewidth=1)
ax.set_xlabel('Fecha', fontsize=12, fontweight='bold')
ax.set_ylabel('Log Returns', fontsize=12, fontweight='bold')
ax.set_title('Predicciones vs Valores Reales - Per√≠odo Holdout (60 d√≠as)', 
             fontsize=14, fontweight='bold', pad=15)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=10)
plt.xticks(rotation=45)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig3_predicciones_holdout.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig3_predicciones_holdout.pdf')
print(f"‚úì Guardada: fig3_predicciones_holdout.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 4: COMPARACI√ìN TRAIN vs HOLDOUT
# =============================================================================

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Filtrar solo modelos con datos OOF
df_comp_valid = df_comparison.dropna(subset=['MASE_OOF', 'DA_OOF'])
models = df_comp_valid['Model'].tolist()
x = np.arange(len(models))
width = 0.35

# MASE comparison
ax1 = axes[0]
bars1 = ax1.bar(x - width/2, df_comp_valid['MASE_OOF'], width, label='Train (OOF)', color='#3498DB', edgecolor='white')
bars2 = ax1.bar(x + width/2, df_comp_valid['MASE'], width, label='Holdout', color='#E74C3C', edgecolor='white')
ax1.axhline(y=1.0, color='gray', linestyle='--', alpha=0.7, linewidth=1.5)
ax1.set_xticks(x)
ax1.set_xticklabels(models, rotation=45, ha='right', fontsize=10)
ax1.set_ylabel('MASE', fontweight='bold')
ax1.set_title('(a) MASE: Train vs Holdout', fontweight='bold', pad=10)
ax1.legend(loc='upper right')
ax1.set_ylim(0, 1.2)

# A√±adir valores
for i, (oof, hold) in enumerate(zip(df_comp_valid['MASE_OOF'], df_comp_valid['MASE'])):
    ax1.text(i - width/2, oof + 0.02, f'{oof:.2f}', ha='center', va='bottom', fontsize=8)
    ax1.text(i + width/2, hold + 0.02, f'{hold:.2f}', ha='center', va='bottom', fontsize=8)

# DA comparison
ax2 = axes[1]
bars3 = ax2.bar(x - width/2, df_comp_valid['DA_OOF'], width, label='Train (OOF)', color='#3498DB', edgecolor='white')
bars4 = ax2.bar(x + width/2, df_comp_valid['DA'], width, label='Holdout', color='#E74C3C', edgecolor='white')
ax2.axhline(y=50, color='gray', linestyle='--', alpha=0.7, linewidth=1.5)
ax2.set_xticks(x)
ax2.set_xticklabels(models, rotation=45, ha='right', fontsize=10)
ax2.set_ylabel('DA (%)', fontweight='bold')
ax2.set_title('(b) Directional Accuracy: Train vs Holdout', fontweight='bold', pad=10)
ax2.legend(loc='upper right')
ax2.set_ylim(0, 70)

# A√±adir valores
for i, (oof, hold) in enumerate(zip(df_comp_valid['DA_OOF'], df_comp_valid['DA'])):
    ax2.text(i - width/2, oof + 1, f'{oof:.1f}', ha='center', va='bottom', fontsize=8)
    ax2.text(i + width/2, hold + 1, f'{hold:.1f}', ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig4_train_vs_holdout.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig4_train_vs_holdout.pdf')
print(f"‚úì Guardada: fig4_train_vs_holdout.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 5: MATRIZ DE CORRELACIONES ENTRE MODELOS
# =============================================================================

# Usar solo modelos disponibles
available_cols = [m for m in SELECTED_MODELS if m in df_eval.columns]
corr_matrix = df_eval[available_cols].corr()

fig, ax = plt.subplots(figsize=(10, 8))

cmap = sns.diverging_palette(220, 20, as_cmap=True)

sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap=cmap, center=0,
            square=True, linewidths=1, ax=ax, vmin=-1, vmax=1,
            annot_kws={'size': 12, 'weight': 'bold'},
            cbar_kws={'shrink': 0.8, 'label': 'Correlaci√≥n'})

ax.set_title('Correlaci√≥n entre Predicciones de Modelos', fontsize=14, fontweight='bold', pad=15)

# Calcular correlaci√≥n promedio
upper_tri = corr_matrix.values[np.triu_indices(len(corr_matrix), k=1)]
avg_corr = upper_tri.mean()
ax.text(0.5, -0.1, f'Correlaci√≥n promedio: {avg_corr:.3f}', 
        transform=ax.transAxes, ha='center', fontsize=11, fontstyle='italic')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig5_correlaciones.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig5_correlaciones.pdf')
print(f"‚úì Guardada: fig5_correlaciones.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 6: ERRORES ACUMULADOS
# =============================================================================

fig, ax = plt.subplots(figsize=(14, 7))

for model in SELECTED_MODELS:
    if model in df_eval.columns:
        errors = np.abs(df_eval['y_real'] - df_eval[model])
        cum_errors = errors.cumsum()
        ax.plot(df_eval['ds'], cum_errors, linewidth=2, 
                label=model, color=COLORS.get(model, 'gray'))

# A√±adir mejor ensemble
if 'Top 3 Average' in strategies:
    best_ensemble = strategies['Top 3 Average']
    errors_ens = np.abs(y_holdout - best_ensemble)
    cum_errors_ens = np.nancumsum(errors_ens)
    ax.plot(df_eval['ds'], cum_errors_ens, linewidth=2.5, linestyle='--',
            label='Top 3 Average (Ensemble)', color=COLORS['Ensemble'])

ax.set_xlabel('Fecha', fontsize=12, fontweight='bold')
ax.set_ylabel('Error Absoluto Acumulado', fontsize=12, fontweight='bold')
ax.set_title('Errores Absolutos Acumulados en Holdout', fontsize=14, fontweight='bold', pad=15)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=10)
plt.xticks(rotation=45)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig6_errores_acumulados.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig6_errores_acumulados.pdf')
print(f"‚úì Guardada: fig6_errores_acumulados.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 7: DISTRIBUCI√ìN DE ERRORES POR MODELO
# =============================================================================

n_models = len([m for m in SELECTED_MODELS if m in df_eval.columns])
n_cols = 3
n_rows = (n_models + n_cols - 1) // n_cols

fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 4*n_rows))
axes = axes.flatten() if n_models > 1 else [axes]

plot_idx = 0
for model in SELECTED_MODELS:
    if model in df_eval.columns and plot_idx < len(axes):
        errors = df_eval['y_real'] - df_eval[model]
        ax = axes[plot_idx]
        
        ax.hist(errors.dropna(), bins=15, edgecolor='white', alpha=0.8, 
                color=COLORS.get(model, 'steelblue'))
        ax.axvline(x=0, color='#E74C3C', linestyle='--', linewidth=2)
        ax.axvline(x=errors.mean(), color='#2ECC71', linestyle='-', linewidth=2, 
                   label=f'Media: {errors.mean():.5f}')
        ax.set_title(f'{model}', fontsize=12, fontweight='bold')
        ax.set_xlabel('Error')
        ax.legend(fontsize=9, loc='upper right')
        ax.grid(True, alpha=0.3)
        plot_idx += 1

# Ocultar subplots vac√≠os
for j in range(plot_idx, len(axes)):
    axes[j].set_visible(False)

plt.suptitle('Distribuci√≥n de Errores de Predicci√≥n por Modelo', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig7_distribucion_errores.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig7_distribucion_errores.pdf')
print(f"‚úì Guardada: fig7_distribucion_errores.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 8: COMPARACI√ìN DE ESTRATEGIAS DE ENSEMBLE
# =============================================================================

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Ordenar por MASE
df_ens_sorted = df_ensemble.sort_values('MASE')

# MASE
ax1 = axes[0]
colors_ens = ['#27ae60' if i == 0 else '#3498db' for i in range(len(df_ens_sorted))]
ax1.barh(df_ens_sorted['Strategy'], df_ens_sorted['MASE'], color=colors_ens, edgecolor='white')
ax1.axvline(x=df_metrics['MASE'].min(), color='#E74C3C', linestyle='--', linewidth=2,
            label=f"Mejor individual ({df_metrics.loc[df_metrics['MASE'].idxmin(), 'Model']})")
ax1.set_xlabel('MASE', fontweight='bold')
ax1.set_title('(a) MASE por Estrategia de Ensemble', fontweight='bold', pad=10)
ax1.legend(loc='lower right')
for i, v in enumerate(df_ens_sorted['MASE']):
    ax1.text(v + 0.002, i, f'{v:.4f}', va='center', fontsize=10)

# DA
ax2 = axes[1]
df_ens_sorted_da = df_ensemble.sort_values('DA', ascending=True)
colors_ens_da = ['#27ae60' if i == len(df_ens_sorted_da)-1 else '#3498db' for i in range(len(df_ens_sorted_da))]
ax2.barh(df_ens_sorted_da['Strategy'], df_ens_sorted_da['DA'], color=colors_ens_da, edgecolor='white')
ax2.axvline(x=df_metrics['DA'].max(), color='#E74C3C', linestyle='--', linewidth=2,
            label=f"Mejor individual ({df_metrics.loc[df_metrics['DA'].idxmax(), 'Model']})")
ax2.set_xlabel('DA (%)', fontweight='bold')
ax2.set_title('(b) Directional Accuracy por Estrategia', fontweight='bold', pad=10)
ax2.legend(loc='lower right')
for i, v in enumerate(df_ens_sorted_da['DA']):
    ax2.text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=10)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig8_estrategias_ensemble.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig8_estrategias_ensemble.pdf')
print(f"‚úì Guardada: fig8_estrategias_ensemble.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 9: ROLLING DA (EVOLUCI√ìN TEMPORAL)
# =============================================================================

fig, ax = plt.subplots(figsize=(14, 7))

window = 10  # Ventana de 10 d√≠as

for model in SELECTED_MODELS:
    if model in df_eval.columns:
        dir_correct = (np.sign(df_eval['y_real']) == np.sign(df_eval[model])).astype(int)
        rolling_da = dir_correct.rolling(window=window).mean() * 100
        ax.plot(df_eval['ds'], rolling_da, linewidth=2, 
                label=model, color=COLORS.get(model, 'gray'))

ax.axhline(y=50, color='#E74C3C', linestyle='--', linewidth=2, alpha=0.7, label='Random (50%)')
ax.fill_between(df_eval['ds'], 45, 55, alpha=0.1, color='gray')

ax.set_xlabel('Fecha', fontsize=12, fontweight='bold')
ax.set_ylabel(f'DA Rolling ({window} d√≠as) %', fontsize=12, fontweight='bold')
ax.set_title('Evoluci√≥n Temporal de Directional Accuracy', fontsize=14, fontweight='bold', pad=15)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=10)
ax.set_ylim(0, 100)
plt.xticks(rotation=45)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig9_rolling_da.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig9_rolling_da.pdf')
print(f"‚úì Guardada: fig9_rolling_da.png/pdf")
plt.show()

In [None]:
# =============================================================================
# FIGURA 10: MEJOR ENSEMBLE vs REAL
# =============================================================================

fig, ax = plt.subplots(figsize=(16, 8))

# Real
ax.plot(df_eval['ds'], y_holdout, color=COLORS['Real'], linewidth=2.5, 
        label='Real', marker='o', markersize=5, zorder=10)

# Mejor ensemble (Top 3 Average)
if 'Top 3 Average' in strategies:
    best_ensemble = strategies['Top 3 Average']
    ax.plot(df_eval['ds'], best_ensemble, color=COLORS['Ensemble'], linewidth=2, 
            linestyle='--', label='Top 3 Average (Ensemble)', marker='s', markersize=4, alpha=0.9)
    # √Årea de error
    ax.fill_between(df_eval['ds'], y_holdout, best_ensemble, alpha=0.2, color=COLORS['Ensemble'])

# Mejor individual
best_model = df_metrics.iloc[0]['Model']
if best_model in df_eval.columns:
    ax.plot(df_eval['ds'], df_eval[best_model], color=COLORS.get(best_model, 'blue'), linewidth=1.5,
            linestyle=':', label=f'{best_model} (Mejor Individual)', alpha=0.8)

ax.axhline(y=0, color='gray', linestyle=':', alpha=0.5)
ax.set_xlabel('Fecha', fontsize=12, fontweight='bold')
ax.set_ylabel('Log Returns', fontsize=12, fontweight='bold')
ax.set_title('Mejor Ensemble vs Valores Reales - Per√≠odo Holdout', fontsize=14, fontweight='bold', pad=15)
ax.legend(loc='upper left', fontsize=11)
plt.xticks(rotation=45)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig10_mejor_ensemble.png', dpi=300)
plt.savefig(OUTPUT_DIR / 'fig10_mejor_ensemble.pdf')
print(f"‚úì Guardada: fig10_mejor_ensemble.png/pdf")
plt.show()

---

# üìã TABLAS PARA EL PAPER (LaTeX)

In [None]:
# =============================================================================
# TABLA 1: M√âTRICAS DE MODELOS (LATEX)
# =============================================================================

# A√±adir ranking
df_table1 = df_metrics.copy()
df_table1['Rank'] = range(1, len(df_table1) + 1)
df_table1 = df_table1[['Rank', 'Model', 'MASE', 'DA', 'MAE', 'RMSE']]

# Formato para LaTeX
latex_table1 = df_table1.to_latex(index=False, 
                                   float_format=lambda x: f'{x:.4f}' if isinstance(x, float) else str(x),
                                   caption='M√©tricas de evaluaci√≥n por modelo en per√≠odo holdout (60 d√≠as)',
                                   label='tab:metrics')

print("TABLA 1: M√©tricas de Modelos")
print("="*70)
print(df_table1.to_string(index=False))
print("="*70)

# Guardar
with open(OUTPUT_DIR / 'tabla1_metricas.tex', 'w') as f:
    f.write(latex_table1)
df_table1.to_csv(OUTPUT_DIR / 'tabla1_metricas.csv', index=False)
print(f"\n‚úì Guardada: tabla1_metricas.tex/csv")

In [None]:
# =============================================================================
# TABLA 2: COMPARACI√ìN TRAIN vs HOLDOUT (LATEX)
# =============================================================================

df_table2 = df_comparison[['Model', 'MASE_OOF', 'MASE', 'ŒîMASE', 'DA_OOF', 'DA', 'ŒîDA']].copy()
df_table2.columns = ['Model', 'MASE Train', 'MASE Holdout', 'Œî MASE', 'DA Train', 'DA Holdout', 'Œî DA']

latex_table2 = df_table2.to_latex(index=False, 
                                   float_format=lambda x: f'{x:.4f}' if isinstance(x, float) else str(x),
                                   caption='Comparaci√≥n de m√©tricas entre Train (OOF) y Holdout',
                                   label='tab:comparison')

print("\nTABLA 2: Comparaci√≥n Train vs Holdout")
print("="*80)
print(df_table2.to_string(index=False))
print("="*80)

# Guardar
with open(OUTPUT_DIR / 'tabla2_comparacion.tex', 'w') as f:
    f.write(latex_table2)
df_table2.to_csv(OUTPUT_DIR / 'tabla2_comparacion.csv', index=False)
print(f"\n‚úì Guardada: tabla2_comparacion.tex/csv")

In [None]:
# =============================================================================
# TABLA 3: ESTRATEGIAS DE ENSEMBLE (LATEX)
# =============================================================================

df_table3 = df_ensemble.copy()

latex_table3 = df_table3.to_latex(index=False, 
                                   float_format=lambda x: f'{x:.4f}' if isinstance(x, float) else str(x),
                                   caption='M√©tricas de estrategias de ensemble',
                                   label='tab:ensemble')

print("\nTABLA 3: Estrategias de Ensemble")
print("="*60)
print(df_table3.to_string(index=False))
print("="*60)

# Guardar
with open(OUTPUT_DIR / 'tabla3_ensemble.tex', 'w') as f:
    f.write(latex_table3)
df_table3.to_csv(OUTPUT_DIR / 'tabla3_ensemble.csv', index=False)
print(f"\n‚úì Guardada: tabla3_ensemble.tex/csv")

In [None]:
# =============================================================================
# TABLA 4: PESOS DEL META-LEARNER (LATEX)
# =============================================================================

if 'ridge' in dir():
    weights_df = pd.DataFrame({
        'Model': available_models,
        'Weight': ridge.coef_,
        'Abs Weight': np.abs(ridge.coef_)
    }).sort_values('Abs Weight', ascending=False)

    latex_table4 = weights_df.to_latex(index=False, 
                                        float_format=lambda x: f'{x:.6f}' if isinstance(x, float) else str(x),
                                        caption='Pesos del Ridge Meta-Learner',
                                        label='tab:weights')

    print("\nTABLA 4: Pesos del Meta-Learner")
    print("="*50)
    print(weights_df.to_string(index=False))
    print(f"\nIntercept: {ridge.intercept_:.6f}")
    print("="*50)

    # Guardar
    with open(OUTPUT_DIR / 'tabla4_pesos.tex', 'w') as f:
        f.write(latex_table4)
    weights_df.to_csv(OUTPUT_DIR / 'tabla4_pesos.csv', index=False)
    print(f"\n‚úì Guardada: tabla4_pesos.tex/csv")
else:
    print("‚ö† Ridge Meta-Learner no disponible")

---

# üìä RESUMEN FINAL Y CONCLUSIONES

In [None]:
# =============================================================================
# RESUMEN FINAL
# =============================================================================

print("="*80)
print("RESUMEN FINAL - EVALUACI√ìN DEL ENSEMBLE USD/PEN")
print("="*80)

print("\nüìä CONFIGURACI√ìN EXPERIMENTAL:")
print(f"   ‚Ä¢ Dataset: {len(df_data):,} observaciones ({df_data['Dates'].min().date()} - {df_data['Dates'].max().date()})")
print(f"   ‚Ä¢ Holdout: {N_HOLDOUT} d√≠as ({holdout_data['Dates'].min().date()} - {holdout_data['Dates'].max().date()})")
print(f"   ‚Ä¢ Modelos evaluados: {len([m for m in SELECTED_MODELS if m in df_eval.columns])} ({', '.join([m for m in SELECTED_MODELS if m in df_eval.columns])})")
print(f"   ‚Ä¢ Modelos excluidos: TiDE (MASE=2.21), ChronosX (datos inv√°lidos), LSTM (sobreajuste)")

print("\nüèÜ MEJORES RESULTADOS:")
best_mase = df_metrics.iloc[0]
best_da = df_metrics.loc[df_metrics['DA'].idxmax()]
print(f"   ‚Ä¢ Mejor MASE: {best_mase['Model']} ({best_mase['MASE']:.4f})")
print(f"   ‚Ä¢ Mejor DA: {best_da['Model']} ({best_da['DA']:.2f}%)")

print("\nüìà ESTRATEGIAS DE ENSEMBLE:")
best_ens = df_ensemble.iloc[0]
print(f"   ‚Ä¢ Mejor ensemble: {best_ens['Strategy']} (MASE={best_ens['MASE']:.4f}, DA={best_ens['DA']:.2f}%)")

# Comparaci√≥n
improvement_mase = (best_mase['MASE'] - best_ens['MASE']) / best_mase['MASE'] * 100
print(f"   ‚Ä¢ Mejora vs mejor individual: {improvement_mase:+.2f}% en MASE")

print("\nüîó DIVERSIDAD DEL ENSEMBLE:")
if len(corr_matrix) > 1:
    upper_tri = corr_matrix.values[np.triu_indices(len(corr_matrix), k=1)]
    print(f"   ‚Ä¢ Correlaci√≥n promedio entre modelos: {upper_tri.mean():.3f}")

print("\nüìÅ ARCHIVOS GENERADOS:")
for f in sorted(OUTPUT_DIR.glob('*')):
    print(f"   ‚Ä¢ {f.name}")

print("\n" + "="*80)
print("‚úì AN√ÅLISIS COMPLETADO")
print("="*80)

In [None]:
# =============================================================================
# GUARDAR DATOS PARA REPRODUCIBILIDAD
# =============================================================================

# Guardar dataset de evaluaci√≥n
df_eval.to_csv(OUTPUT_DIR / 'evaluation_data.csv', index=False)

# Guardar todas las m√©tricas
df_metrics.to_csv(OUTPUT_DIR / 'all_metrics.csv', index=False)

# Guardar resultados de ensemble
df_ensemble.to_csv(OUTPUT_DIR / 'ensemble_results.csv', index=False)

# Guardar comparaci√≥n
df_comparison.to_csv(OUTPUT_DIR / 'train_holdout_comparison.csv', index=False)

print("‚úì Todos los datos guardados para reproducibilidad")
print(f"\nüìÇ Directorio de salida: {OUTPUT_DIR}")