# Notebook 04: Modelo XGBoost para Predicci√≥n de Urgencias

**Objetivo**: Entrenar clasificador XGBoost para predecir urgencias (Definici√≥n A: P75) usando features de series temporales.

## Estrategia:
1. Cargar datasets con features (train/test)
2. Entrenar XGBoost con class_weight para manejar desbalance
3. Evaluar con m√©tricas: Accuracy, Precision, Recall, F1, ROC-AUC
4. Analizar feature importance
5. Generar predicciones y an√°lisis por producto
6. Guardar modelo y resultados

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, classification_report, roc_curve, auc
)
import xgboost as xgb
from sklearn.model_selection import cross_val_score
import joblib
import warnings
warnings.filterwarnings('ignore')

# Configuraci√≥n de visualizaci√≥n
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úì Librer√≠as cargadas")

## 1. Carga de Datos

In [None]:
# Cargar train/test con features
df_train = pd.read_csv('../data/simulated/train_features.csv')
df_test = pd.read_csv('../data/simulated/test_features.csv')

# Cargar lista de features
features_list = pd.read_csv('../data/simulated/feature_list.csv')['feature'].tolist()

print(f"üìä Datos cargados:")
print(f"\nTRAIN:")
print(f"  ‚Ä¢ Registros: {len(df_train):,}")
print(f"  ‚Ä¢ Productos: {df_train['item_id'].nunique():,}")
print(f"  ‚Ä¢ Urgencias: {df_train['is_urgent_a'].sum():,} ({df_train['is_urgent_a'].mean():.1%})")

print(f"\nTEST:")
print(f"  ‚Ä¢ Registros: {len(df_test):,}")
print(f"  ‚Ä¢ Productos: {df_test['item_id'].nunique():,}")
print(f"  ‚Ä¢ Urgencias: {df_test['is_urgent_a'].sum():,} ({df_test['is_urgent_a'].mean():.1%})")

print(f"\n‚úì Features disponibles: {len(features_list)}")

## 2. Preparaci√≥n de Features y Target

In [None]:
# Separar features y target
X_train = df_train[features_list]
y_train = df_train['is_urgent_a']

X_test = df_test[features_list]
y_test = df_test['is_urgent_a']

print(f"‚úì Shapes preparados:")
print(f"  ‚Ä¢ X_train: {X_train.shape}")
print(f"  ‚Ä¢ y_train: {y_train.shape}")
print(f"  ‚Ä¢ X_test: {X_test.shape}")
print(f"  ‚Ä¢ y_test: {y_test.shape}")

# Verificar valores infinitos o NaN
print(f"\nüîç Verificaci√≥n de calidad:")
print(f"  ‚Ä¢ NaN en X_train: {X_train.isnull().sum().sum()}")
print(f"  ‚Ä¢ Inf en X_train: {np.isinf(X_train.values).sum()}")
print(f"  ‚Ä¢ NaN en X_test: {X_test.isnull().sum().sum()}")
print(f"  ‚Ä¢ Inf en X_test: {np.isinf(X_test.values).sum()}")

## 3. Entrenamiento XGBoost

In [None]:
# Calcular scale_pos_weight para manejar desbalance
# Ratio de negativos/positivos
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()

print(f"‚öñÔ∏è Class balance:")
print(f"  ‚Ä¢ Clase 0 (no urgente): {(y_train == 0).sum():,} ({(y_train == 0).mean():.1%})")
print(f"  ‚Ä¢ Clase 1 (urgente): {(y_train == 1).sum():,} ({(y_train == 1).mean():.1%})")
print(f"  ‚Ä¢ scale_pos_weight: {scale_pos_weight:.2f}")

In [None]:
# Configurar modelo XGBoost
print("üöÄ Entrenando XGBoost...\n")

model = xgb.XGBClassifier(
    n_estimators=200,           # N√∫mero de √°rboles
    max_depth=6,                # Profundidad m√°xima
    learning_rate=0.1,          # Tasa de aprendizaje
    subsample=0.8,              # Fracci√≥n de muestras por √°rbol
    colsample_bytree=0.8,       # Fracci√≥n de features por √°rbol
    scale_pos_weight=scale_pos_weight,  # Balance de clases
    objective='binary:logistic', # Clasificaci√≥n binaria
    eval_metric='logloss',      # M√©trica de evaluaci√≥n
    random_state=42,
    n_jobs=-1                   # Usar todos los cores
)

# Entrenar con early stopping
model.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    verbose=50  # Mostrar progreso cada 50 iteraciones
)

print("\n‚úì Modelo entrenado")

## 4. Predicciones

In [None]:
# Predicciones en train y test
y_train_pred = model.predict(X_train)
y_train_proba = model.predict_proba(X_train)[:, 1]

y_test_pred = model.predict(X_test)
y_test_proba = model.predict_proba(X_test)[:, 1]

print("‚úì Predicciones generadas")

## 5. Evaluaci√≥n del Modelo

In [None]:
# M√©tricas en TRAIN
train_acc = accuracy_score(y_train, y_train_pred)
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)
train_auc = roc_auc_score(y_train, y_train_proba)

# M√©tricas en TEST
test_acc = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, y_test_proba)

print("\n" + "="*70)
print("üìä M√âTRICAS DE EVALUACI√ìN")
print("="*70)

print(f"\n{'M√©trica':<20} {'TRAIN':<15} {'TEST':<15} {'Diferencia':<15}")
print("-" * 70)
print(f"{'Accuracy':<20} {train_acc:>14.3f} {test_acc:>14.3f} {train_acc - test_acc:>14.3f}")
print(f"{'Precision':<20} {train_precision:>14.3f} {test_precision:>14.3f} {train_precision - test_precision:>14.3f}")
print(f"{'Recall':<20} {train_recall:>14.3f} {test_recall:>14.3f} {train_recall - test_recall:>14.3f}")
print(f"{'F1-Score':<20} {train_f1:>14.3f} {test_f1:>14.3f} {train_f1 - test_f1:>14.3f}")
print(f"{'ROC-AUC':<20} {train_auc:>14.3f} {test_auc:>14.3f} {train_auc - test_auc:>14.3f}")
print("="*70)

# Interpretaci√≥n
print(f"\nüéØ INTERPRETACI√ìN:")
print(f"  ‚Ä¢ Accuracy: {test_acc:.1%} de predicciones correctas")
print(f"  ‚Ä¢ Precision: {test_precision:.1%} de las urgencias predichas son reales")
print(f"  ‚Ä¢ Recall: {test_recall:.1%} de las urgencias reales fueron detectadas")
print(f"  ‚Ä¢ F1-Score: {test_f1:.3f} (balance entre precision y recall)")
print(f"  ‚Ä¢ ROC-AUC: {test_auc:.3f} (capacidad discriminativa del modelo)")

In [None]:
# Classification Report detallado
print("\nüìã CLASSIFICATION REPORT (TEST):")
print("="*70)
print(classification_report(y_test, y_test_pred, target_names=['No Urgente', 'Urgente']))

## 6. Confusion Matrix

In [None]:
# Matriz de confusi√≥n
cm = confusion_matrix(y_test, y_test_pred)

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No Urgente', 'Urgente'],
            yticklabels=['No Urgente', 'Urgente'],
            ax=ax, cbar_kws={'label': 'Frecuencia'})

ax.set_xlabel('Predicci√≥n', fontsize=12, fontweight='bold')
ax.set_ylabel('Real', fontsize=12, fontweight='bold')
ax.set_title('Matriz de Confusi√≥n - XGBoost (Test Set)', fontsize=14, fontweight='bold', pad=20)

# A√±adir totales
total = cm.sum()
tn, fp, fn, tp = cm.ravel()
ax.text(2.3, 0.3, f'TN: {tn}\n({tn/total:.1%})', fontsize=10, ha='left')
ax.text(2.3, 1.3, f'TP: {tp}\n({tp/total:.1%})', fontsize=10, ha='left')

plt.tight_layout()
plt.savefig('../results/confusion_matrix_xgboost.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\n‚úì Gr√°fico guardado: results/confusion_matrix_xgboost.png")
print(f"\nDesglose:")
print(f"  ‚Ä¢ True Negatives (TN): {tn:,} - Correctamente identificados como no urgentes")
print(f"  ‚Ä¢ False Positives (FP): {fp:,} - Falsa alarma (predijo urgente, no lo era)")
print(f"  ‚Ä¢ False Negatives (FN): {fn:,} - Urgencia perdida (no detect√≥ urgencia real)")
print(f"  ‚Ä¢ True Positives (TP): {tp:,} - Urgencia correctamente detectada")

## 7. ROC Curve

In [None]:
# Curva ROC
fpr, tpr, thresholds = roc_curve(y_test, y_test_proba)
roc_auc = auc(fpr, tpr)

fig, ax = plt.subplots(figsize=(10, 7))

# Curva ROC
ax.plot(fpr, tpr, color='darkorange', lw=2, 
        label=f'ROC curve (AUC = {roc_auc:.3f})')

# L√≠nea diagonal (clasificador aleatorio)
ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', 
        label='Random classifier (AUC = 0.500)')

# Punto √≥ptimo (m√°ximo Youden's J)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
ax.plot(fpr[optimal_idx], tpr[optimal_idx], 'ro', markersize=10, 
        label=f'Optimal threshold = {optimal_threshold:.3f}')

ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax.set_title('ROC Curve - XGBoost', fontsize=14, fontweight='bold', pad=20)
ax.legend(loc='lower right', fontsize=10)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig('../results/roc_curve_xgboost.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\n‚úì Gr√°fico guardado: results/roc_curve_xgboost.png")
print(f"\nUmbral √≥ptimo: {optimal_threshold:.3f}")
print(f"  ‚Ä¢ TPR (Recall): {tpr[optimal_idx]:.3f}")
print(f"  ‚Ä¢ FPR: {fpr[optimal_idx]:.3f}")

## 8. Feature Importance

In [None]:
# Obtener feature importance
feature_importance = pd.DataFrame({
    'feature': features_list,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nüìä TOP 20 FEATURES M√ÅS IMPORTANTES:")
print("="*60)
for idx, row in feature_importance.head(20).iterrows():
    print(f"{row['feature']:<30} {row['importance']:>10.4f}")

# Guardar feature importance completa
feature_importance.to_csv('../results/feature_importance_xgboost.csv', index=False)
print(f"\n‚úì Feature importance guardada: results/feature_importance_xgboost.csv")

In [None]:
# Visualizar top 20 features
fig, ax = plt.subplots(figsize=(10, 8))
top_features = feature_importance.head(20)

ax.barh(range(len(top_features)), top_features['importance'], color='steelblue')
ax.set_yticks(range(len(top_features)))
ax.set_yticklabels(top_features['feature'])
ax.invert_yaxis()
ax.set_xlabel('Importance', fontsize=12, fontweight='bold')
ax.set_title('Top 20 Features - XGBoost', fontsize=14, fontweight='bold', pad=20)
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('../results/feature_importance_xgboost.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úì Gr√°fico guardado: results/feature_importance_xgboost.png")

## 9. An√°lisis por Producto

In [None]:
# Agregar predicciones al dataframe de test
df_test['pred_urgent'] = y_test_pred
df_test['pred_proba'] = y_test_proba

# M√©tricas por producto
metricas_producto = df_test.groupby('item_id').apply(
    lambda x: pd.Series({
        'n_semanas': len(x),
        'urgencias_reales': x['is_urgent_a'].sum(),
        'urgencias_pred': x['pred_urgent'].sum(),
        'accuracy': accuracy_score(x['is_urgent_a'], x['pred_urgent']),
        'precision': precision_score(x['is_urgent_a'], x['pred_urgent'], zero_division=0),
        'recall': recall_score(x['is_urgent_a'], x['pred_urgent'], zero_division=0),
        'f1': f1_score(x['is_urgent_a'], x['pred_urgent'], zero_division=0)
    })
).reset_index()

print("\nüì¶ M√âTRICAS POR PRODUCTO (Estad√≠sticas):")
print("="*70)
print(metricas_producto[['accuracy', 'precision', 'recall', 'f1']].describe())

# Top 10 productos mejor predichos (mayor F1)
print("\nüèÜ TOP 10 PRODUCTOS MEJOR PREDICHOS (Mayor F1):")
print("="*70)
top_productos = metricas_producto.nlargest(10, 'f1')
for idx, row in top_productos.iterrows():
    print(f"{row['item_id']:<20} F1: {row['f1']:.3f}  Accuracy: {row['accuracy']:.3f}  Recall: {row['recall']:.3f}")

# Guardar m√©tricas por producto
metricas_producto.to_csv('../results/metricas_por_producto_xgboost.csv', index=False)
print(f"\n‚úì M√©tricas por producto guardadas: results/metricas_por_producto_xgboost.csv")

In [None]:
# 3. An√°lisis de errores del modelo
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# 3a. Errores por categor√≠a (si est√° disponible)
if 'category' in df_test.columns:
    error_por_categoria = df_test.groupby('category').apply(
        lambda x: pd.Series({
            'accuracy': accuracy_score(x['is_urgent_a'], x['pred_urgent']),
            'n_samples': len(x)
        })
    ).reset_index()
    
    axes[0, 0].bar(error_por_categoria['category'], error_por_categoria['accuracy'], color='steelblue')
    axes[0, 0].set_ylabel('Accuracy', fontsize=11, fontweight='bold')
    axes[0, 0].set_xlabel('Categor√≠a', fontsize=11, fontweight='bold')
    axes[0, 0].set_title('Accuracy por Categor√≠a de Producto', fontsize=12, fontweight='bold')
    axes[0, 0].tick_params(axis='x', rotation=45)
    axes[0, 0].grid(alpha=0.3, axis='y')
    
    # A√±adir n√∫mero de muestras encima de barras
    for idx, row in error_por_categoria.iterrows():
        axes[0, 0].text(idx, row['accuracy'] + 0.01, f"n={int(row['n_samples'])}", 
                       ha='center', fontsize=8)

# 3b. Distribuci√≥n de errores en el tiempo
df_test_sorted = df_test.sort_values('week_start')
df_test_sorted['error'] = (df_test_sorted['is_urgent_a'] != df_test_sorted['pred_urgent']).astype(int)
errores_por_semana = df_test_sorted.groupby('week_start')['error'].mean()

axes[0, 1].plot(errores_por_semana.index, errores_por_semana.values, color='red', linewidth=2)
axes[0, 1].set_ylabel('Tasa de Error', fontsize=11, fontweight='bold')
axes[0, 1].set_xlabel('Semana', fontsize=11, fontweight='bold')
axes[0, 1].set_title('Evoluci√≥n de la Tasa de Error en Test Set', fontsize=12, fontweight='bold')
axes[0, 1].tick_params(axis='x', rotation=45)
axes[0, 1].grid(alpha=0.3)
axes[0, 1].axhline(y=df_test_sorted['error'].mean(), color='orange', 
                   linestyle='--', label=f'Media: {df_test_sorted["error"].mean():.3f}')
axes[0, 1].legend()

# 3c. An√°lisis de falsos positivos vs falsos negativos
df_test['error_type'] = 'Correcto'
df_test.loc[(df_test['pred_urgent'] == 1) & (df_test['is_urgent_a'] == 0), 'error_type'] = 'Falso Positivo'
df_test.loc[(df_test['pred_urgent'] == 0) & (df_test['is_urgent_a'] == 1), 'error_type'] = 'Falso Negativo'

error_counts = df_test['error_type'].value_counts()
colors_errors = {'Correcto': 'green', 'Falso Positivo': 'orange', 'Falso Negativo': 'red'}
axes[1, 0].pie(error_counts.values, labels=error_counts.index, autopct='%1.1f%%',
              colors=[colors_errors.get(x, 'gray') for x in error_counts.index],
              startangle=90)
axes[1, 0].set_title('Distribuci√≥n de Tipos de Predicci√≥n', fontsize=12, fontweight='bold')

# 3d. Probabilidades en errores vs aciertos
correctas = df_test[df_test['error_type'] == 'Correcto']['pred_proba']
fp = df_test[df_test['error_type'] == 'Falso Positivo']['pred_proba']
fn = df_test[df_test['error_type'] == 'Falso Negativo']['pred_proba']

if len(fp) > 0:
    axes[1, 1].hist(fp, bins=30, alpha=0.7, label='Falso Positivo', color='orange', edgecolor='black')
if len(fn) > 0:
    axes[1, 1].hist(fn, bins=30, alpha=0.7, label='Falso Negativo', color='red', edgecolor='black')

axes[1, 1].axvline(x=0.5, color='black', linestyle='--', linewidth=2, label='Umbral')
axes[1, 1].set_xlabel('Probabilidad Predicha', fontsize=11, fontweight='bold')
axes[1, 1].set_ylabel('Frecuencia', fontsize=11, fontweight='bold')
axes[1, 1].set_title('Distribuci√≥n de Probabilidades en Errores', fontsize=12, fontweight='bold')
axes[1, 1].legend()
axes[1, 1].grid(alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('../results/analisis_errores_xgboost.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úì Gr√°fico guardado: results/analisis_errores_xgboost.png")
print(f"\\nüìä Resumen de errores:")
print(f"  ‚Ä¢ Falsos Positivos: {(df_test['error_type'] == 'Falso Positivo').sum():,} ({(df_test['error_type'] == 'Falso Positivo').mean():.1%})")
print(f"  ‚Ä¢ Falsos Negativos: {(df_test['error_type'] == 'Falso Negativo').sum():,} ({(df_test['error_type'] == 'Falso Negativo').mean():.1%})")

In [None]:
# Guardar modelo entrenado (SOBRESCRIBE versi√≥n anterior)
joblib.dump(model, '../models/xgboost_urgency_classifier.pkl')
print("‚úì Modelo guardado: models/xgboost_urgency_classifier.pkl")

# Guardar predicciones de test (SOBRESCRIBE)
predicciones_test = df_test[['item_id', 'week_start', 'total_sales', 
                              'is_urgent_a', 'pred_urgent', 'pred_proba']].copy()
predicciones_test.to_csv('../results/predicciones_test_xgboost.csv', index=False)
print("‚úì Predicciones guardadas: results/predicciones_test_xgboost.csv")

# Guardar m√©tricas resumen (SOBRESCRIBE)
metricas_resumen = pd.DataFrame({
    'modelo': ['XGBoost'],
    'train_accuracy': [train_acc],
    'test_accuracy': [test_acc],
    'train_precision': [train_precision],
    'test_precision': [test_precision],
    'train_recall': [train_recall],
    'test_recall': [test_recall],
    'train_f1': [train_f1],
    'test_f1': [test_f1],
    'train_auc': [train_auc],
    'test_auc': [test_auc],
    'n_features': [len(features_list)],
    'n_train': [len(df_train)],
    'n_test': [len(df_test)],
    'falsos_positivos': [(df_test['error_type'] == 'Falso Positivo').sum()],
    'falsos_negativos': [(df_test['error_type'] == 'Falso Negativo').sum()]
})

metricas_resumen.to_csv('../results/metricas_resumen_xgboost.csv', index=False)
print("‚úì M√©tricas resumen guardadas: results/metricas_resumen_xgboost.csv")

print("\n‚ö†Ô∏è  IMPORTANTE: Todos los archivos anteriores han sido SOBRESCRITOS")

In [None]:
# Convertir week_start a datetime si no lo est√°
df_test['week_start'] = pd.to_datetime(df_test['week_start'])

# 1. Distribuci√≥n de probabilidades predichas
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Distribuci√≥n por clase real
for clase, label, color in [(0, 'No Urgente', 'skyblue'), (1, 'Urgente', 'salmon')]:
    probas = df_test[df_test['is_urgent_a'] == clase]['pred_proba']
    axes[0].hist(probas, bins=50, alpha=0.7, label=label, color=color, edgecolor='black')

axes[0].set_xlabel('Probabilidad Predicha', fontsize=11, fontweight='bold')
axes[0].set_ylabel('Frecuencia', fontsize=11, fontweight='bold')
axes[0].set_title('Distribuci√≥n de Probabilidades por Clase Real', fontsize=13, fontweight='bold')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Boxplot de probabilidades
data_box = [
    df_test[df_test['is_urgent_a'] == 0]['pred_proba'],
    df_test[df_test['is_urgent_a'] == 1]['pred_proba']
]
bp = axes[1].boxplot(data_box, labels=['No Urgente', 'Urgente'], patch_artist=True)
bp['boxes'][0].set_facecolor('skyblue')
bp['boxes'][1].set_facecolor('salmon')
axes[1].set_ylabel('Probabilidad Predicha', fontsize=11, fontweight='bold')
axes[1].set_title('Distribuci√≥n de Probabilidades (Boxplot)', fontsize=13, fontweight='bold')
axes[1].grid(alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('../results/probabilidades_distribucion_xgboost.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úì Gr√°fico guardado: results/probabilidades_distribucion_xgboost.png")

print("\n" + "="*70)
print("üéØ RESUMEN MODELO XGBOOST")
print("="*70)

print(f"\nüìä RENDIMIENTO EN TEST SET:")
print(f"  ‚Ä¢ Accuracy: {test_acc:.1%}")
print(f"  ‚Ä¢ Precision: {test_precision:.1%}")
print(f"  ‚Ä¢ Recall: {test_recall:.1%}")
print(f"  ‚Ä¢ F1-Score: {test_f1:.3f}")
print(f"  ‚Ä¢ ROC-AUC: {test_auc:.3f}")

print(f"\nüîç TOP 5 FEATURES M√ÅS IMPORTANTES:")
for i, (idx, row) in enumerate(feature_importance.head(5).iterrows(), 1):
    print(f"  {i}. {row['feature']:<25} (importance: {row['importance']:.4f})")

print(f"\nüì¶ COBERTURA:")
print(f"  ‚Ä¢ Productos evaluados: {df_test['item_id'].nunique():,}")
print(f"  ‚Ä¢ Semanas de predicci√≥n: {df_test['week_start'].nunique()}")
print(f"  ‚Ä¢ Total predicciones: {len(df_test):,}")

print(f"\nüíæ ARCHIVOS GENERADOS:")
print(f"  Modelo:")
print(f"    ‚Ä¢ models/xgboost_urgency_classifier.pkl")
print(f"  Visualizaciones:")
print(f"    ‚Ä¢ results/confusion_matrix_xgboost.png")
print(f"    ‚Ä¢ results/roc_curve_xgboost.png")
print(f"    ‚Ä¢ results/feature_importance_xgboost.png")
print(f"    ‚Ä¢ results/probabilidades_distribucion_xgboost.png")
print(f"    ‚Ä¢ results/predicciones_series_temporales_xgboost.png")
print(f"    ‚Ä¢ results/analisis_errores_xgboost.png")
print(f"  Datos:")
print(f"    ‚Ä¢ results/feature_importance_xgboost.csv")
print(f"    ‚Ä¢ results/predicciones_test_xgboost.csv")
print(f"    ‚Ä¢ results/metricas_por_producto_xgboost.csv")
print(f"    ‚Ä¢ results/metricas_resumen_xgboost.csv")

print(f"\n‚úÖ MODELO XGBOOST COMPLETADO")
print(f"\n‚ö†Ô∏è  Nota sobre m√©tricas:")
if test_acc > 0.95:
    print(f"  ‚ö†Ô∏è  ADVERTENCIA: Accuracy muy alta ({test_acc:.1%})")
    print(f"  ‚Üí Revisar si hay data leakage en features de urgencias")
    print(f"  ‚Üí Verificar que rolling features usan shift(1)")
    print(f"  ‚Üí M√©tricas realistas esperadas: 70-85% accuracy")
else:
    print(f"  ‚úì M√©tricas en rango realista (sin data leakage aparente)")

print(f"\nPr√≥ximos pasos:")
print(f"  ‚Üí Notebook 05: Modelo Prophet (series temporales)")
print(f"  ‚Üí Notebook 06: Modelo Random Forest")
print(f"  ‚Üí Notebook 07: Comparaci√≥n de modelos")
print("="*70)

## 10. Guardar Modelo y Resultados

In [None]:
# Guardar modelo entrenado
joblib.dump(model, '../models/xgboost_urgency_classifier.pkl')
print("‚úì Modelo guardado: models/xgboost_urgency_classifier.pkl")

# Guardar predicciones de test
predicciones_test = df_test[['item_id', 'week_start', 'total_sales', 
                              'is_urgent_a', 'pred_urgent', 'pred_proba']].copy()
predicciones_test.to_csv('../results/predicciones_test_xgboost.csv', index=False)
print("‚úì Predicciones guardadas: results/predicciones_test_xgboost.csv")

# Guardar m√©tricas resumen
metricas_resumen = pd.DataFrame({
    'modelo': ['XGBoost'],
    'train_accuracy': [train_acc],
    'test_accuracy': [test_acc],
    'train_precision': [train_precision],
    'test_precision': [test_precision],
    'train_recall': [train_recall],
    'test_recall': [test_recall],
    'train_f1': [train_f1],
    'test_f1': [test_f1],
    'train_auc': [train_auc],
    'test_auc': [test_auc],
    'n_features': [len(features_list)],
    'n_train': [len(df_train)],
    'n_test': [len(df_test)]
})

metricas_resumen.to_csv('../results/metricas_resumen_xgboost.csv', index=False)
print("‚úì M√©tricas resumen guardadas: results/metricas_resumen_xgboost.csv")

## 11. Resumen Final

In [None]:
print("\n" + "="*70)
print("üéØ RESUMEN MODELO XGBOOST")
print("="*70)

print(f"\nüìä RENDIMIENTO EN TEST SET:")
print(f"  ‚Ä¢ Accuracy: {test_acc:.1%}")
print(f"  ‚Ä¢ Precision: {test_precision:.1%}")
print(f"  ‚Ä¢ Recall: {test_recall:.1%}")
print(f"  ‚Ä¢ F1-Score: {test_f1:.3f}")
print(f"  ‚Ä¢ ROC-AUC: {test_auc:.3f}")

print(f"\nüîç TOP 5 FEATURES M√ÅS IMPORTANTES:")
for idx, row in feature_importance.head(5).iterrows():
    print(f"  {idx+1}. {row['feature']:<25} (importance: {row['importance']:.4f})")

print(f"\nüì¶ COBERTURA:")
print(f"  ‚Ä¢ Productos evaluados: {df_test['item_id'].nunique():,}")
print(f"  ‚Ä¢ Semanas de predicci√≥n: {df_test['week_start'].nunique()}")
print(f"  ‚Ä¢ Total predicciones: {len(df_test):,}")

print(f"\nüíæ ARCHIVOS GENERADOS:")
print(f"  ‚Ä¢ models/xgboost_urgency_classifier.pkl")
print(f"  ‚Ä¢ results/confusion_matrix_xgboost.png")
print(f"  ‚Ä¢ results/roc_curve_xgboost.png")
print(f"  ‚Ä¢ results/feature_importance_xgboost.png")
print(f"  ‚Ä¢ results/feature_importance_xgboost.csv")
print(f"  ‚Ä¢ results/predicciones_test_xgboost.csv")
print(f"  ‚Ä¢ results/metricas_por_producto_xgboost.csv")
print(f"  ‚Ä¢ results/metricas_resumen_xgboost.csv")

print(f"\n‚úÖ MODELO XGBOOST COMPLETADO")
print(f"\nPr√≥ximos pasos:")
print(f"  ‚Üí Notebook 05: Modelo Prophet (series temporales)")
print(f"  ‚Üí Notebook 06: Modelo Random Forest")
print(f"  ‚Üí Notebook 07: Comparaci√≥n de modelos")
print("="*70)