In [None]:
# ===========================
# Imports
# ===========================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Configuraci√≥n de visualizaci√≥n
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Directorios
EXPORTS_DIR = Path('../exports')
MODELS = ['randomforest', 'xgboost', 'stacking', 'mlp', 'lstm']
COLORS = {'randomforest': '#2ecc71', 'xgboost': '#3498db', 'stacking': '#9b59b6', 
          'mlp': '#e74c3c', 'lstm': '#f39c12'}

print("‚úÖ Imports completados")

## 1Ô∏è‚É£ Comparaci√≥n de M√©tricas

Cargar todas las predicciones y calcular m√©tricas comparativas.

In [None]:
# ===========================
# Cargar predicciones VAL y TEST
# ===========================
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

metrics_comparison = []

for model_name in MODELS:
    for split in ['val', 'test']:
        pred_file = EXPORTS_DIR / f'predictions_{model_name}_{split}.csv'
        
        if not pred_file.exists():
            print(f"‚ö†Ô∏è {pred_file.name} no encontrado")
            continue
        
        df = pd.read_csv(pred_file)
        
        rmse = np.sqrt(mean_squared_error(df['y_true'], df['y_pred']))
        mae = mean_absolute_error(df['y_true'], df['y_pred'])
        r2 = r2_score(df['y_true'], df['y_pred'])
        
        metrics_comparison.append({
            'model': model_name,
            'split': split,
            'rmse': rmse,
            'mae': mae,
            'r2': r2,
            'n_samples': len(df)
        })

metrics_df = pd.DataFrame(metrics_comparison)

# Tabla comparativa
print("\nüìä TABLA COMPARATIVA DE M√âTRICAS\n")
pivot_table = metrics_df.pivot_table(
    index='model', 
    columns='split', 
    values=['rmse', 'mae', 'r2']
).round(4)
print(pivot_table)

# Mejor modelo por RMSE en val
best_val = metrics_df[metrics_df['split'] == 'val'].sort_values('rmse').iloc[0]
print(f"\nüèÜ Mejor modelo (VAL): {best_val['model'].upper()} - RMSE: {best_val['rmse']:.4f}")

# Mejor modelo por RMSE en test
best_test = metrics_df[metrics_df['split'] == 'test'].sort_values('rmse').iloc[0]
print(f"üèÜ Mejor modelo (TEST): {best_test['model'].upper()} - RMSE: {best_test['rmse']:.4f}")

In [None]:
# ===========================
# Visualizaci√≥n: Comparaci√≥n de RMSE
# ===========================
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# RMSE por modelo y split
rmse_pivot = metrics_df.pivot(index='model', columns='split', values='rmse')
rmse_pivot.plot(kind='bar', ax=axes[0], color=['#3498db', '#e74c3c'], alpha=0.8)
axes[0].set_title('RMSE por Modelo y Split', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Modelo')
axes[0].set_ylabel('RMSE')
axes[0].legend(title='Split', labels=['Val', 'Test'])
axes[0].grid(axis='y', alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# MAE por modelo y split
mae_pivot = metrics_df.pivot(index='model', columns='split', values='mae')
mae_pivot.plot(kind='bar', ax=axes[1], color=['#2ecc71', '#f39c12'], alpha=0.8)
axes[1].set_title('MAE por Modelo y Split', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Modelo')
axes[1].set_ylabel('MAE')
axes[1].legend(title='Split', labels=['Val', 'Test'])
axes[1].grid(axis='y', alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

# R¬≤ por modelo y split
r2_pivot = metrics_df.pivot(index='model', columns='split', values='r2')
r2_pivot.plot(kind='bar', ax=axes[2], color=['#9b59b6', '#34495e'], alpha=0.8)
axes[2].set_title('R¬≤ por Modelo y Split', fontsize=14, fontweight='bold')
axes[2].set_xlabel('Modelo')
axes[2].set_ylabel('R¬≤')
axes[2].legend(title='Split', labels=['Val', 'Test'])
axes[2].grid(axis='y', alpha=0.3)
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_metrics.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico guardado: comparison_metrics.png")

## 2Ô∏è‚É£ Distribuci√≥n de Errores

Analizar distribuciones de residuales para cada modelo.

In [None]:
# ===========================
# Distribuci√≥n de residuales (VAL)
# ===========================
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.flatten()

for idx, model_name in enumerate(MODELS):
    pred_file = EXPORTS_DIR / f'predictions_{model_name}_val.csv'
    
    if not pred_file.exists():
        continue
    
    df = pd.read_csv(pred_file)
    
    # Histograma
    axes[idx].hist(df['residual'], bins=50, color=COLORS[model_name], alpha=0.7, edgecolor='black')
    axes[idx].axvline(0, color='red', linestyle='--', linewidth=2, label='Error=0')
    axes[idx].set_title(f'{model_name.upper()} - Distribuci√≥n de Residuales (VAL)', 
                       fontsize=12, fontweight='bold')
    axes[idx].set_xlabel('Residual (y_true - y_pred)')
    axes[idx].set_ylabel('Frecuencia')
    axes[idx].legend()
    axes[idx].grid(alpha=0.3)
    
    # Estad√≠sticas
    mean_res = df['residual'].mean()
    std_res = df['residual'].std()
    axes[idx].text(0.95, 0.95, f'Œº={mean_res:.2f}\nœÉ={std_res:.2f}', 
                  transform=axes[idx].transAxes, 
                  verticalalignment='top', horizontalalignment='right',
                  bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# Ocultar subplot extra
axes[5].axis('off')

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_residuals_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico guardado: comparison_residuals_distribution.png")

## 3Ô∏è‚É£ Residuales por Segmento

Analizar c√≥mo se comportan los errores en diferentes clusters de tiendas.

In [None]:
# ===========================
# Boxplots de residuales por cluster
# ===========================
fig, axes = plt.subplots(2, 3, figsize=(20, 12))
axes = axes.flatten()

for idx, model_name in enumerate(MODELS):
    pred_file = EXPORTS_DIR / f'predictions_{model_name}_val.csv'
    
    if not pred_file.exists():
        continue
    
    df = pd.read_csv(pred_file)
    
    # Boxplot por cluster
    df.boxplot(column='residual', by='shop_cluster', ax=axes[idx], 
               patch_artist=True, grid=False)
    
    # Personalizar
    axes[idx].set_title(f'{model_name.upper()} - Residuales por Cluster (VAL)', 
                       fontsize=12, fontweight='bold')
    axes[idx].set_xlabel('Shop Cluster')
    axes[idx].set_ylabel('Residual')
    axes[idx].axhline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
    plt.sca(axes[idx])
    plt.xticks(rotation=0)

# Ocultar subplot extra
axes[5].axis('off')

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_residuals_by_cluster.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico guardado: comparison_residuals_by_cluster.png")

## 4Ô∏è‚É£ Comparaci√≥n SHAP (RandomForest vs XGBoost)

Comparar la importancia de features entre los dos modelos tree-based.

In [None]:
# ===========================
# Comparaci√≥n SHAP (Top 10 features)
# ===========================
shap_rf = pd.read_csv(EXPORTS_DIR / 'shap_summary_randomforest_val.csv')
shap_xgb = pd.read_csv(EXPORTS_DIR / 'shap_summary_xgboost_val.csv')

# Top 10 features de cada modelo
top_rf = shap_rf.nsmallest(10, 'rank')[['feature', 'mean_abs_shap_value']].copy()
top_rf['model'] = 'RandomForest'

top_xgb = shap_xgb.nsmallest(10, 'rank')[['feature', 'mean_abs_shap_value']].copy()
top_xgb['model'] = 'XGBoost'

# Combinar
top_features = pd.concat([top_rf, top_xgb])

# Visualizaci√≥n
fig, ax = plt.subplots(figsize=(14, 8))

# Barplot agrupado
x = np.arange(10)
width = 0.35

rf_values = top_rf['mean_abs_shap_value'].values
xgb_values = top_xgb['mean_abs_shap_value'].values
rf_features = top_rf['feature'].values

ax.barh(x - width/2, rf_values, width, label='RandomForest', color=COLORS['randomforest'], alpha=0.8)
ax.barh(x + width/2, xgb_values, width, label='XGBoost', color=COLORS['xgboost'], alpha=0.8)

ax.set_yticks(x)
ax.set_yticklabels(rf_features)
ax.invert_yaxis()
ax.set_xlabel('Mean |SHAP Value|', fontsize=12)
ax.set_title('Top 10 Features - Comparaci√≥n SHAP (RF vs XGB)', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_shap_rf_vs_xgb.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico guardado: comparison_shap_rf_vs_xgb.png")

# Consistencia de features
common_features = set(top_rf['feature']) & set(top_xgb['feature'])
print(f"\nüîç Features comunes en Top 10: {len(common_features)}/10")
print(f"   {', '.join(sorted(common_features))}")

## 5Ô∏è‚É£ Curvas de Aprendizaje (DL Models)

Comparar la convergencia de MLP vs LSTM.

In [None]:
# ===========================
# Curvas de aprendizaje MLP vs LSTM
# ===========================
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

for idx, model_name in enumerate(['mlp', 'lstm']):
    curve_file = EXPORTS_DIR / f'learning_curves_{model_name}.csv'
    
    if not curve_file.exists():
        axes[idx].text(0.5, 0.5, f'{model_name.upper()} learning curves no disponibles', 
                      ha='center', va='center', transform=axes[idx].transAxes)
        continue
    
    df = pd.read_csv(curve_file)
    
    # Plot train y val loss
    axes[idx].plot(df['epoch'], df['train_loss'], label='Train Loss', 
                  color=COLORS[model_name], linewidth=2)
    
    if 'val_loss' in df.columns and not df['val_loss'].isna().all():
        axes[idx].plot(df['epoch'], df['val_loss'], label='Val Loss', 
                      color='red', linewidth=2, linestyle='--')
    
    axes[idx].set_xlabel('Epoch', fontsize=12)
    axes[idx].set_ylabel('Loss (MSE)', fontsize=12)
    axes[idx].set_title(f'{model_name.upper()} - Curva de Aprendizaje', 
                       fontsize=14, fontweight='bold')
    axes[idx].legend()
    axes[idx].grid(alpha=0.3)
    
    # Marcar mejor epoch (menor val_loss)
    if 'val_loss' in df.columns and not df['val_loss'].isna().all():
        best_epoch = df['val_loss'].idxmin() + 1
        best_loss = df.loc[best_epoch-1, 'val_loss']
        axes[idx].scatter(best_epoch, best_loss, color='red', s=100, zorder=5, marker='*')
        axes[idx].annotate(f'Best: Epoch {best_epoch}', 
                          xy=(best_epoch, best_loss), 
                          xytext=(10, 10), textcoords='offset points',
                          bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.5),
                          arrowprops=dict(arrowstyle='->', color='black'))

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_learning_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico guardado: comparison_learning_curves.png")

## 6Ô∏è‚É£ Generalizaci√≥n: Val vs Test

Analizar la diferencia de RMSE entre validaci√≥n y test para detectar overfitting.

In [None]:
# ===========================
# An√°lisis de generalizaci√≥n
# ===========================
generalization = []

for model_name in MODELS:
    val_metrics = metrics_df[(metrics_df['model'] == model_name) & (metrics_df['split'] == 'val')]
    test_metrics = metrics_df[(metrics_df['model'] == model_name) & (metrics_df['split'] == 'test')]
    
    if len(val_metrics) == 0 or len(test_metrics) == 0:
        continue
    
    val_rmse = val_metrics['rmse'].values[0]
    test_rmse = test_metrics['rmse'].values[0]
    diff = test_rmse - val_rmse
    pct_diff = (diff / val_rmse) * 100
    
    generalization.append({
        'model': model_name,
        'val_rmse': val_rmse,
        'test_rmse': test_rmse,
        'diff': diff,
        'pct_diff': pct_diff
    })

gen_df = pd.DataFrame(generalization)

# Tabla
print("\nüìä AN√ÅLISIS DE GENERALIZACI√ìN (Val vs Test)\n")
print(gen_df.to_string(index=False))

# Visualizaci√≥n
fig, ax = plt.subplots(figsize=(12, 6))

x = np.arange(len(gen_df))
width = 0.35

ax.bar(x - width/2, gen_df['val_rmse'], width, label='Val RMSE', 
       color='#3498db', alpha=0.8)
ax.bar(x + width/2, gen_df['test_rmse'], width, label='Test RMSE', 
       color='#e74c3c', alpha=0.8)

# Marcar diferencias
for i, row in gen_df.iterrows():
    if row['diff'] > 0:
        color = 'red'
        label = f'+{row["pct_diff"]:.1f}%'
    else:
        color = 'green'
        label = f'{row["pct_diff"]:.1f}%'
    
    ax.text(i, max(row['val_rmse'], row['test_rmse']) + 0.05, label, 
           ha='center', color=color, fontweight='bold')

ax.set_xlabel('Modelo', fontsize=12)
ax.set_ylabel('RMSE', fontsize=12)
ax.set_title('Generalizaci√≥n: Val vs Test RMSE', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([m.upper() for m in gen_df['model']], rotation=45)
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_generalization.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Gr√°fico guardado: comparison_generalization.png")

# Modelo m√°s estable
most_stable = gen_df.loc[gen_df['pct_diff'].abs().idxmin()]
print(f"\nüéØ Modelo m√°s estable: {most_stable['model'].upper()} ({most_stable['pct_diff']:.2f}% diff)")

## 7Ô∏è‚É£ Scatter Plots: Predicciones vs Real

Visualizar la correlaci√≥n entre predicciones y valores reales.

In [None]:
# ===========================
# Scatter plots: y_pred vs y_true
# ===========================
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for idx, model_name in enumerate(MODELS):
    pred_file = EXPORTS_DIR / f'predictions_{model_name}_val.csv'
    
    if not pred_file.exists():
        continue
    
    df = pd.read_csv(pred_file)
    
    # Sample para performance (max 5000 puntos)
    if len(df) > 5000:
        df_sample = df.sample(5000, random_state=42)
    else:
        df_sample = df
    
    # Scatter
    axes[idx].scatter(df_sample['y_true'], df_sample['y_pred'], 
                     alpha=0.3, s=10, color=COLORS[model_name])
    
    # L√≠nea perfecta
    max_val = max(df_sample['y_true'].max(), df_sample['y_pred'].max())
    axes[idx].plot([0, max_val], [0, max_val], 'r--', linewidth=2, label='Perfect Prediction')
    
    axes[idx].set_xlabel('y_true', fontsize=11)
    axes[idx].set_ylabel('y_pred', fontsize=11)
    axes[idx].set_title(f'{model_name.upper()} - Predicciones vs Real (VAL)', 
                       fontsize=12, fontweight='bold')
    axes[idx].legend()
    axes[idx].grid(alpha=0.3)
    
    # R¬≤ en el gr√°fico
    r2 = r2_score(df['y_true'], df['y_pred'])
    axes[idx].text(0.05, 0.95, f'R¬≤ = {r2:.4f}', 
                  transform=axes[idx].transAxes, 
                  verticalalignment='top',
                  bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.5))

# Ocultar subplot extra
axes[5].axis('off')

plt.tight_layout()
plt.savefig(EXPORTS_DIR / 'comparison_scatter_pred_vs_true.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico guardado: comparison_scatter_pred_vs_true.png")

## 8Ô∏è‚É£ Resumen Final

Generar un reporte consolidado de todos los an√°lisis.

In [None]:
# ===========================
# Resumen consolidado
# ===========================
print("\n" + "="*60)
print("üéØ RESUMEN CONSOLIDADO - AN√ÅLISIS COMPARATIVO")
print("="*60 + "\n")

print("üìä 1. MEJOR MODELO POR M√âTRICA (VALIDACI√ìN)")
val_metrics = metrics_df[metrics_df['split'] == 'val']
print(f"   ‚Ä¢ RMSE: {val_metrics.loc[val_metrics['rmse'].idxmin(), 'model'].upper()} ({val_metrics['rmse'].min():.4f})")
print(f"   ‚Ä¢ MAE:  {val_metrics.loc[val_metrics['mae'].idxmin(), 'model'].upper()} ({val_metrics['mae'].min():.4f})")
print(f"   ‚Ä¢ R¬≤:   {val_metrics.loc[val_metrics['r2'].idxmax(), 'model'].upper()} ({val_metrics['r2'].max():.4f})")

print("\nüìä 2. MEJOR MODELO POR M√âTRICA (TEST)")
test_metrics = metrics_df[metrics_df['split'] == 'test']
print(f"   ‚Ä¢ RMSE: {test_metrics.loc[test_metrics['rmse'].idxmin(), 'model'].upper()} ({test_metrics['rmse'].min():.4f})")
print(f"   ‚Ä¢ MAE:  {test_metrics.loc[test_metrics['mae'].idxmin(), 'model'].upper()} ({test_metrics['mae'].min():.4f})")
print(f"   ‚Ä¢ R¬≤:   {test_metrics.loc[test_metrics['r2'].idxmax(), 'model'].upper()} ({test_metrics['r2'].max():.4f})")

print(f"\nüéØ 3. MODELO M√ÅS ESTABLE (Val‚ÜíTest)")
print(f"   {most_stable['model'].upper()} con {most_stable['pct_diff']:.2f}% de diferencia")

print(f"\nüîç 4. CONSISTENCIA DE FEATURES (SHAP)")
print(f"   {len(common_features)}/10 features comunes en Top 10 (RF vs XGB)")

print("\nüìà 5. GR√ÅFICOS GENERADOS")
generated_plots = [
    'comparison_metrics.png',
    'comparison_residuals_distribution.png',
    'comparison_residuals_by_cluster.png',
    'comparison_shap_rf_vs_xgb.png',
    'comparison_learning_curves.png',
    'comparison_generalization.png',
    'comparison_scatter_pred_vs_true.png'
]
for plot in generated_plots:
    if (EXPORTS_DIR / plot).exists():
        print(f"   ‚úÖ {plot}")
    else:
        print(f"   ‚ö†Ô∏è {plot} (no generado)")

print("\n" + "="*60)
print("‚úÖ AN√ÅLISIS COMPARATIVO COMPLETADO")
print("="*60)