In [0]:
%pip install cmdstanpy prophet lightgbm prefect holidays --quiet
%pip install -U opentelemetry-api --quiet
dbutils.library.restartPython()

In [0]:
# Importaciones
import sys
sys.path.append("/Workspace/Repos/desareca/santiago-weather-forecast")

from src.data.ingestion import load_from_delta_table
from src.data.preprocessing import prepare_time_series, train_test_split_temporal
from src.models.arima_model import ARIMAPredictor
from src.models.prophet_model import ProphetPredictor
from src.models.lightgbm_model import LightGBMPredictor
from src.evaluation.metrics import plot_predictions
from src.utils.config import *
import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

mlflow.set_experiment(EXPERIMENT_NAME)
print("‚úÖ Setup completo")

In [0]:
print("\n" + "="*70)
print("CARGANDO MEJORES MODELOS DESDE MLFLOW")
print("="*70)

# Obtener experimento
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
print(f"\nüìä Experimento: {EXPERIMENT_NAME}")
print(f"   ID: {experiment.experiment_id}")

# Filtrar solo parent runs (excluir folds individuales)
all_runs = mlflow.search_runs(
    experiment_ids=[experiment.experiment_id],
    filter_string="tags.run_type = 'parent' and tags.evaluation_type = 'cross_validation'",
    order_by=["metrics.cv_avg_rmse ASC"]
)

print(f"\nüìà Total configuraciones de modelos encontradas: {len(all_runs)}")

if len(all_runs) == 0:
    print("\n‚ö†Ô∏è  No se encontraron runs de experimentaci√≥n con CV.")
    print("   Ejecuta primero el notebook 03_model_experiments con grid search.")
else:
    print("\nüîù Top 10 configuraciones por F1-Score (CV):")
    
    # Construir datos para mostrar
    display_data = []
    
    for idx, row in all_runs.head(10).iterrows():
        model_family = row.get('tags.model_family', 'Unknown')
        
        # Construir string de hiperpar√°metros seg√∫n familia
        if model_family == 'ARIMA':
            p = row.get('params.p', 'N/A')
            d = row.get('params.d', 'N/A')
            q = row.get('params.q', 'N/A')
            hyperparams = f"({p},{d},{q})"
            
        elif model_family == 'Prophet':
            cp = row.get('params.changepoint_prior_scale', 'N/A')
            ss = row.get('params.seasonality_prior_scale', 'N/A')
            hyperparams = f"cp={cp}, ss={ss}"
                    
        elif model_family == 'LightGBM':
            n_est = row.get('params.n_estimators', 'N/A')
            lr = row.get('params.learning_rate', 'N/A')
            depth = row.get('params.max_depth', 'N/A')
            hyperparams = f"trees={n_est}, lr={lr}, d={depth}"
        else:
            hyperparams = "N/A"
        
        display_data.append({
            'Rank': idx + 1,
            'Familia': model_family,
            'Hiperpar√°metros': hyperparams,
            'F1 (CV)': f"{row.get('metrics.cv_avg_f1_score', 0):.3f}",
            'RMSE (CV)': f"{row.get('metrics.cv_avg_rmse', 0):.2f}",
            'Descripci√≥n': row.get('tags.description', 'N/A')[:40]
        })
    
    df_display = pd.DataFrame(display_data)
    print(df_display.to_string(index=False))

In [0]:
print("\n" + "="*70)
print("SELECCI√ìN DE MEJORES MODELOS POR FAMILIA")
print("="*70)

# Filtrar mejores por familia (solo parent runs)
best_arima = all_runs[all_runs['tags.model_family'] == 'ARIMA'].head(1)
best_prophet = all_runs[all_runs['tags.model_family'] == 'Prophet'].head(1)
best_lgbm = all_runs[all_runs['tags.model_family'] == 'LightGBM'].head(1)

# Funci√≥n para mostrar info detallada
def print_model_info(df, family_name):
    if len(df) == 0:
        print(f"\n‚ùå No se encontr√≥ modelo {family_name}")
        return None
    
    row = df.iloc[0]
    print(f"\n{'='*60}")
    print(f"ü•á Mejor {family_name}")
    print(f"{'='*60}")
    print(f"   Run ID: {row['run_id'][:12]}...")
    print(f"   F1-Score (CV): {row.get('metrics.cv_avg_f1_score', 0):.3f}")
    print(f"   MAE (CV): {row.get('metrics.cv_avg_mae', 0):.3f} mm")
    print(f"   RMSE (CV): {row.get('metrics.cv_avg_rmse', 0):.3f} mm")
    
    # Mostrar hiperpar√°metros espec√≠ficos
    print(f"\n   üìã Hiperpar√°metros:")
    
    if family_name == 'ARIMA':
        print(f"      p = {row.get('params.p', 'N/A')}")
        print(f"      d = {row.get('params.d', 'N/A')}")
        print(f"      q = {row.get('params.q', 'N/A')}")
        
    elif family_name == 'Prophet':
        print(f"      yearly_seasonality = {row.get('params.yearly_seasonality', 'N/A')}")
        print(f"      weekly_seasonality = {row.get('params.weekly_seasonality', 'N/A')}")
        print(f"      changepoint_prior_scale = {row.get('params.changepoint_prior_scale', 'N/A')}")
        print(f"      seasonality_prior_scale = {row.get('params.seasonality_prior_scale', 'N/A')}")
            
    elif family_name == 'LightGBM':
        print(f"      n_estimators = {row.get('params.n_estimators', 'N/A')}")
        print(f"      learning_rate = {row.get('params.learning_rate', 'N/A')}")
        print(f"      max_depth = {row.get('params.max_depth', 'N/A')}")
        print(f"      num_leaves = {row.get('params.num_leaves', 'N/A')}")
        print(f"      lags = {row.get('params.lags', 'N/A')}")
        print(f"      rolling_windows = {row.get('params.rolling_windows', 'N/A')}")
    
    if 'tags.description' in row.index and pd.notna(row['tags.description']):
        print(f"\n   üìù Descripci√≥n: {row['tags.description']}")
    
    return row

arima_info = print_model_info(best_arima, "ARIMA")
prophet_info = print_model_info(best_prophet, "Prophet")
lgbm_info = print_model_info(best_lgbm, "LightGBM")

In [0]:
print("\n" + "="*70)
print("CARGAR MODELOS ENTRENADOS DESDE MLFLOW")
print("="*70)

models_to_test = []

# Funci√≥n para cargar modelo desde MLflow
def load_trained_model(run_id, model_family, params):
    """Carga modelo entrenado desde MLflow y lo envuelve en su predictor"""
    
    try:
        # Cargar modelo de MLflow
        model_uri = f"runs:/{run_id}/model"
        loaded_sklearn_model = mlflow.sklearn.load_model(model_uri)
        
        # Crear wrapper del predictor
        if model_family == 'ARIMA':
            wrapper = ARIMAPredictor(
                p=params['p'],
                d=params['d'],
                q=params['q']
            )
        elif model_family == 'Prophet':
            wrapper = ProphetPredictor(
                yearly_seasonality=params['yearly_seasonality'],
                weekly_seasonality=params['weekly_seasonality'],
                daily_seasonality=params['daily_seasonality'],
                changepoint_prior_scale=params['changepoint_prior_scale'],
                seasonality_prior_scale=params['seasonality_prior_scale']
            )
        elif model_family == 'LightGBM':
            wrapper = LightGBMPredictor(
                n_estimators=params['n_estimators'],
                learning_rate=params['learning_rate'],
                max_depth=params['max_depth'],
                num_leaves=params['num_leaves'],
                min_child_samples=params['min_child_samples'],
                subsample=params['subsample'],
                colsample_bytree=params['colsample_bytree'],
                reg_alpha=params['reg_alpha'],
                reg_lambda=params['reg_lambda'],
                lags=params['lags'],
                rolling_windows=params['rolling_windows']
            )
        
        # Asignar el modelo cargado al wrapper
        wrapper.model = loaded_sklearn_model
        
        # Para LightGBM, necesitamos tambi√©n el train_history
        if model_family == 'LightGBM':
            # Se asignar√° cuando hagamos fit con train
            wrapper.train_history = None
        
        return wrapper
        
    except Exception as e:
        print(f"‚ùå Error cargando modelo: {e}")
        return None


# Cargar ARIMA
if arima_info is not None:
    print("\nüîÑ Cargando ARIMA desde MLflow...")
    
    arima_params = {
        'p': int(arima_info.get('params.p', 1)),
        'd': int(arima_info.get('params.d', 1)),
        'q': int(arima_info.get('params.q', 1))
    }
    
    arima_wrapper = load_trained_model(arima_info['run_id'], 'ARIMA', arima_params)
    
    if arima_wrapper:
        models_to_test.append({
            'name': 'ARIMA',
            'model': arima_wrapper,
            'params': arima_params,
            'cv_rmse': arima_info.get('metrics.cv_avg_rmse', 0)
        })
        print(f"‚úÖ ARIMA cargado desde run {arima_info['run_id'][:8]}...")


# Cargar Prophet
if prophet_info is not None:
    print("\nüîÑ Cargando Prophet desde MLflow...")
    
    prophet_params = {
        'yearly_seasonality': prophet_info.get('params.yearly_seasonality', 'True') == 'True',
        'weekly_seasonality': prophet_info.get('params.weekly_seasonality', 'False') == 'True',
        'daily_seasonality': prophet_info.get('params.daily_seasonality', 'False') == 'True',
        'changepoint_prior_scale': float(prophet_info.get('params.changepoint_prior_scale', 0.05)),
        'seasonality_prior_scale': float(prophet_info.get('params.seasonality_prior_scale', 10.0))
    }
    
    prophet_wrapper = load_trained_model(prophet_info['run_id'], 'Prophet', prophet_params)
    
    if prophet_wrapper:
        models_to_test.append({
            'name': 'Prophet',
            'model': prophet_wrapper,
            'params': prophet_params,
            'cv_rmse': prophet_info.get('metrics.cv_avg_rmse', 0)
        })
        print(f"‚úÖ Prophet cargado desde run {prophet_info['run_id'][:8]}...")


# Cargar LightGBM
if lgbm_info is not None:
    print("\nüîÑ Cargando LightGBM desde MLflow...")
    
    lags_str = lgbm_info.get('params.lags', '[1, 7, 30]')
    rolling_str = lgbm_info.get('params.rolling_windows', '[7, 30]')
    
    lgbm_params = {
        'n_estimators': int(lgbm_info.get('params.n_estimators', 100)),
        'learning_rate': float(lgbm_info.get('params.learning_rate', 0.1)),
        'max_depth': int(lgbm_info.get('params.max_depth', 5)),
        'num_leaves': int(lgbm_info.get('params.num_leaves', 31)),
        'min_child_samples': int(lgbm_info.get('params.min_child_samples', 20)),
        'subsample': float(lgbm_info.get('params.subsample', 0.8)),
        'colsample_bytree': float(lgbm_info.get('params.colsample_bytree', 0.8)),
        'reg_alpha': float(lgbm_info.get('params.reg_alpha', 0.0)),
        'reg_lambda': float(lgbm_info.get('params.reg_lambda', 0.0)),
        'lags': eval(lags_str) if isinstance(lags_str, str) else [1, 7, 30],
        'rolling_windows': eval(rolling_str) if isinstance(rolling_str, str) else [7, 30]
    }
    
    lgbm_wrapper = load_trained_model(lgbm_info['run_id'], 'LightGBM', lgbm_params)
    
    if lgbm_wrapper:
        models_to_test.append({
            'name': 'LightGBM',
            'model': lgbm_wrapper,
            'params': lgbm_params,
            'cv_rmse': lgbm_info.get('metrics.cv_avg_rmse', 0)
        })
        print(f"‚úÖ LightGBM cargado desde run {lgbm_info['run_id'][:8]}...")


print(f"\nüìã Total modelos cargados exitosamente: {len(models_to_test)}")

In [0]:
print("\n" + "="*70)
print("PREPARANDO DATOS PARA EVALUACI√ìN EN TEST SET")
print("="*70)

# Cargar datos
df = load_from_delta_table("weather_raw", spark)
serie = prepare_time_series(df, target_col="precipitacion")
train, test = train_test_split_temporal(serie, train_ratio=TRAIN_SPLIT)

print(f"\nüìä Dataset:")
print(f"   Total: {len(serie)} d√≠as")
print(f"   Train: {len(train)} d√≠as ({train.index.min().date()} ‚Üí {train.index.max().date()})")
print(f"   Test: {len(test)} d√≠as ({test.index.min().date()} ‚Üí {test.index.max().date()})")

In [0]:
print("\n" + "="*70)
print("EVALUACI√ìN EN TEST SET")
print("="*70)

results_test = []
predictions_dict = {}

for i, model_config in enumerate(models_to_test):
    print(f"\n{'='*70}")
    print(f"[{i+1}/{len(models_to_test)}] Evaluando {model_config['name']}")
    print(f"{'='*70}")
    
    model = model_config['model']
    
    # El modelo est√° entrenado con dataset completo, pero necesitamos
    # re-entrenar solo con train para evaluar correctamente en test
    print(f"\nüöÄ Reentrenando {model_config['name']} con datos de train...")
    model.fit(train)
    
    # Predecir
    print(f"üîÆ Generando predicciones...")
    preds = model.predict(steps=len(test))
    preds.index = test.index
    predictions_dict[model_config['name']] = preds
    
    # Evaluar
    print(f"üìä Evaluando...")
    metrics = model.evaluate(test, preds)
    
    # Agregar a resultados
    results_test.append({
        'Modelo': model_config['name'],
        'RMSE (CV)': model_config['cv_rmse'],
        'RMSE (Test)': metrics['rmse'],
        'MAE (Test)': metrics['mae'],
        'R¬≤ (Test)': metrics['r2'],
        'F1 (Test)': metrics['f1_score'],
        'Accuracy (Test)': metrics['accuracy'],
        'Precision (Test)': metrics['precision'],
        'Recall (Test)': metrics['recall']
    })

# Crear DataFrame de resultados
df_results = pd.DataFrame(results_test)
df_results = df_results.sort_values('RMSE (Test)', ascending=True)

print("\n" + "="*70)
print("RESULTADOS FINALES EN TEST SET")
print("="*70)
print(df_results.to_string(index=False))

In [0]:
print("\n" + "="*70)
print("AN√ÅLISIS DE RESULTADOS")
print("="*70)

# Comparar RMSE CV vs Test
print("\nüìä Comparaci√≥n RMSE: CV vs Test:")
for _, row in df_results.iterrows():
    diff = row['RMSE (Test)'] - row['RMSE (CV)']
    symbol = "üìà" if diff > 0 else "üìâ"
    print(f"   {symbol} {row['Modelo']:10s}: CV={row['RMSE (CV)']:.3f} ‚Üí Test={row['RMSE (Test)']:.3f} (Œî={diff:+.3f})")

# Ganadores por m√©trica
print("\nüèÜ Ganadores por m√©trica:")
metrics_winner = {
    'RMSE (Test)': ('min', df_results),
    'MAE (Test)': ('min', df_results),
    'R¬≤ (Test)': ('max', df_results),
    'F1 (Test)': ('max', df_results),
    'Accuracy (Test)': ('max', df_results)
}

for metric, (direction, df) in metrics_winner.items():
    if direction == 'min':
        winner = df.loc[df[metric].idxmin(), 'Modelo']
        value = df[metric].min()
    else:
        winner = df.loc[df[metric].idxmax(), 'Modelo']
        value = df[metric].max()
    print(f"   {metric:20s}: {winner:10s} ({value:.3f})")

# Mejor modelo
best_model = df_results.iloc[0]
print(f"\n{'='*70}")
print(f"üèÜ MEJOR MODELO EN TEST SET (menor RMSE): {best_model['Modelo']}")
print(f"{'='*70}")
print(f"   RMSE: {best_model['RMSE (Test)']:.3f} mm")
print(f"   MAE:  {best_model['MAE (Test)']:.3f} mm")
print(f"   R¬≤:   {best_model['R¬≤ (Test)']:.3f}")
print(f"   F1:   {best_model['F1 (Test)']:.3f}")
print(f"   Accuracy: {best_model['Accuracy (Test)']*100:.1f}%")

print(f"\nüí° Interpretaci√≥n de resultados:")
print(f"   - ARIMA: Predice casi siempre 0mm (sin lluvia) ‚Üí Alta accuracy pero F1=0")
print(f"   - Prophet: Predice lluvia muy frecuentemente ‚Üí Recall=100% pero baja precision")
print(f"   - LightGBM: Balance entre ambos extremos ‚Üí Mejor F1 general")
print(f"   - R¬≤ negativo: Los modelos no superan un predictor naive (media)")
print(f"   - Santiago tiene ~70% d√≠as secos ‚Üí Problema de desbalanceo severo")

In [0]:
print("\n" + "="*70)
print("VISUALIZACI√ìN DE PREDICCIONES")
print("="*70)

fig, ax = plt.subplots(figsize=(16, 6))

# Colores por modelo
colors = {
    'ARIMA': 'steelblue',
    'Prophet': 'coral',
    'LightGBM': 'green'
}

# Plot real
ax.plot(test.index, test.values,
        label='Real', color='black',
        alpha=0.8, linewidth=2)

# Plot predicciones
for model_name, preds in predictions_dict.items():
    model_metrics = df_results[df_results['Modelo'] == model_name].iloc[0]
    
    ax.plot(preds.index, preds.values,
            label=f"{model_name} (RMSE={model_metrics['RMSE (Test)']:.2f}, F1={model_metrics['F1 (Test)']:.3f})",
            color=colors[model_name],
            alpha=0.7, linewidth=1.5)

ax.set_title('Predicciones vs Real - Test Set', fontsize=14, fontweight='bold')
ax.set_xlabel('Fecha', fontsize=11)
ax.set_ylabel('Precipitaci√≥n (mm)', fontsize=11)
ax.legend(fontsize=10, loc='upper right')
ax.grid(alpha=0.3)
ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [0]:
print("\n" + "="*70)
print("MATRIZ DE CONFUSI√ìN (Clasificaci√≥n Lluvia S√≠/No)")
print("="*70)

threshold = 1.0  # mm
n_models = len(predictions_dict)

fig, axes = plt.subplots(1, n_models, figsize=(6*n_models, 5))

if n_models == 1:
    axes = [axes]

for idx, (model_name, preds) in enumerate(predictions_dict.items()):
    ax = axes[idx]
    
    # Convertir a clasificaci√≥n binaria
    y_true_binary = (test > threshold).astype(int)
    y_pred_binary = (preds > threshold).astype(int)
    
    # Calcular matriz de confusi√≥n
    tp = ((y_true_binary == 1) & (y_pred_binary == 1)).sum()
    fp = ((y_true_binary == 0) & (y_pred_binary == 1)).sum()
    tn = ((y_true_binary == 0) & (y_pred_binary == 0)).sum()
    fn = ((y_true_binary == 1) & (y_pred_binary == 0)).sum()
    
    conf_matrix = pd.DataFrame(
        [[tn, fp], [fn, tp]],
        columns=['Pred: No Lluvia', 'Pred: Lluvia'],
        index=['Real: No Lluvia', 'Real: Lluvia']
    )
    
    sns.heatmap(conf_matrix, annot=True, fmt='d', 
                cmap='Blues', ax=ax, cbar=False)
    
    model_metrics = df_results[df_results['Modelo'] == model_name].iloc[0]
    ax.set_title(
        f"{model_name}\nPrecision={model_metrics['Precision (Test)']*100:.1f}%, "
        f"Recall={model_metrics['Recall (Test)']*100:.1f}%",
        fontsize=12, fontweight='bold'
    )

plt.tight_layout()
plt.show()

In [0]:
"""
print("\n" + "="*70)
print("REGISTRAR MEJOR MODELO EN MLFLOW REGISTRY")
print("="*70)

# Seleccionar mejor modelo por RMSE
best_model_name = df_results.iloc[0]['Modelo']
best_model_config = next(m for m in models_to_test if m['name'] == best_model_name)

print(f"\nüèÜ Registrando: {best_model_name}")
print(f"   RMSE (Test): {df_results.iloc[0]['RMSE (Test)']:.3f} mm")
print(f"   MAE (Test):  {df_results.iloc[0]['MAE (Test)']:.3f} mm")
print(f"   F1 (Test):   {df_results.iloc[0]['F1 (Test)']:.3f}")

# Reentrenar con serie completa
print(f"\nüöÄ Reentrenando {best_model_name} con serie completa ({len(serie)} d√≠as)...")
best_model_config['model'].fit(serie)
print(f"‚úÖ Modelo final entrenado")

# Registrar en MLflow
with mlflow.start_run(run_name=f"{best_model_name}_PRODUCTION_CANDIDATE"):
    # Tags
    mlflow.set_tag("run_type", "production_candidate")
    mlflow.set_tag("model_family", best_model_name)
    mlflow.set_tag("run_stage", "production")
    mlflow.set_tag("description", f"Mejor modelo en test set - {best_model_name}")
    
    # Par√°metros
    mlflow.log_params(best_model_config['params'])
    mlflow.log_param("model_type", best_model_name)
    mlflow.log_param("trained_on", "full_dataset")
    mlflow.log_param("n_samples", len(serie))
    mlflow.log_param("date_range", f"{serie.index.min().date()} to {serie.index.max().date()}")
    
    # M√©tricas de test
    mlflow.log_metric("test_rmse", df_results.iloc[0]['RMSE (Test)'])
    mlflow.log_metric("test_mae", df_results.iloc[0]['MAE (Test)'])
    mlflow.log_metric("test_r2", df_results.iloc[0]['R¬≤ (Test)'])
    mlflow.log_metric("test_f1", df_results.iloc[0]['F1 (Test)'])
    mlflow.log_metric("test_accuracy", df_results.iloc[0]['Accuracy (Test)'])
    
    # M√©trica CV de referencia
    mlflow.log_metric("cv_rmse", best_model_config['cv_rmse'])
    
    # Guardar modelo
    mlflow.sklearn.log_model(best_model_config['model'].model, "model")
    
    # Registrar en Model Registry
    model_uri = f"runs:/{mlflow.active_run().info.run_id}/model"
    registered_model = mlflow.register_model(model_uri, "santiago_weather_predictor")
    
    print(f"\n{'='*70}")
    print(f"‚úÖ MODELO REGISTRADO EXITOSAMENTE")
    print(f"{'='*70}")
    print(f"\n  üìù Nombre: {registered_model.name}")
    print(f"  üî¢ Versi√≥n: {registered_model.version}")
    print(f"  üè∑Ô∏è  Tipo: {best_model_name}")
    print(f"  üìâ RMSE (Test): {df_results.iloc[0]['RMSE (Test)']:.3f} mm")
    print(f"  üìä F1 (Test): {df_results.iloc[0]['F1 (Test)']:.3f}")
    print(f"\n  üîó URI: {model_uri}")
    print(f"\n  ‚û°Ô∏è  Ir a MLflow UI ‚Üí Models para ver el modelo registrado")
"""
