In [0]:
%pip install prophet lightgbm prefect --no-deps --quiet
%pip install -U opentelemetry-api --quiet
dbutils.library.restartPython()

In [0]:
import sys
sys.path.append("/Workspace/Repos/desareca/santiago-weather-forecast")

from src.data.ingestion import load_from_delta_table
from src.data.preprocessing import prepare_time_series, train_test_split_temporal
from src.models.arima_model import ARIMAPredictor
from src.models.prophet_model import ProphetPredictor
from src.evaluation.cross_validation import TimeSeriesSplit, evaluate_with_cv
from src.utils.config import *
import mlflow
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Configurar experimento MLflow
mlflow.set_experiment(EXPERIMENT_NAME)

print("‚úÖ Setup completo")

In [0]:
# Cargar datos desde Delta Table
df = load_from_delta_table("weather_raw", spark)

# Preparar serie temporal
serie = prepare_time_series(df, target_col="precipitacion")

# Train/Test split tradicional (80/20)
train, test = train_test_split_temporal(serie, train_ratio=TRAIN_SPLIT)

print(f"\nüìä Datos preparados:")
print(f"  Serie completa: {len(serie)} d√≠as")
print(f"  Train: {len(train)} d√≠as")
print(f"  Test: {len(test)} d√≠as")

In [0]:
print("\n" + "="*70)
print("EXPERIMENTO 1: ARIMA(1,1,1) Baseline")
print("="*70)

# Crear y entrenar modelo ARIMA
arima_baseline = ARIMAPredictor(p=1, d=1, q=1)

# Entrenar y evaluar con MLflow tracking
metrics_arima = arima_baseline.train_and_evaluate(train, test, log_mlflow=True)

In [0]:
print("\n" + "="*70)
print("EXPERIMENTO 2: Prophet con estacionalidad anual")
print("="*70)

# Crear y entrenar modelo Prophet
prophet_model = ProphetPredictor(
    yearly_seasonality=True,
    weekly_seasonality=False,
    changepoint_prior_scale=0.05
)

# Entrenar y evaluar con MLflow tracking
metrics_prophet = prophet_model.train_and_evaluate(train, test, log_mlflow=True)

In [0]:
print("\n" + "="*70)
print("COMPARACI√ìN INICIAL (Train/Test Split)")
print("="*70)

# Ver qu√© m√©tricas retorn√≥ cada modelo
print("\nM√©tricas disponibles ARIMA:")
print(metrics_arima.keys())
print("\nM√©tricas disponibles Prophet:")
print(metrics_prophet.keys())

# Comparaci√≥n
comparison_simple = pd.DataFrame({
    'ARIMA(1,1,1)': pd.Series(metrics_arima),
    'Prophet': pd.Series(metrics_prophet)
})

print("\nüìä Todas las m√©tricas:")
print(comparison_simple.round(3))

In [0]:
print("\n" + "="*70)
print("CROSS-VALIDATION: Visualizaci√≥n de Folds")
print("="*70)

# Crear objeto CV
cv = TimeSeriesSplit(n_splits=5, test_size=30)

# Visualizar splits
cv.visualize_splits(serie)

In [0]:
print("\n" + "="*70)
print("CROSS-VALIDATION: ARIMA(1,1,1)")
print("="*70)

results_arima_cv = evaluate_with_cv(
    model_class=ARIMAPredictor,
    data=serie,
    n_splits=5,
    p=1, d=1, q=1
)

In [0]:
print("\n" + "="*70)
print("CROSS-VALIDATION: Prophet")
print("="*70)

results_prophet_cv = evaluate_with_cv(
    model_class=ProphetPredictor,
    data=serie,
    n_splits=5,
    yearly_seasonality=True,
    weekly_seasonality=False,
    changepoint_prior_scale=0.05
)

In [0]:
print("\n" + "="*70)
print("COMPARACI√ìN FINAL (Promedios Cross-Validation)")
print("="*70)

# Comparar promedios de ambos modelos
comparison_cv = pd.DataFrame({
    'ARIMA': results_arima_cv[['mae', 'rmse', 'r2', 'accuracy', 'precision', 'recall', 'f1_score']].mean(),
    'Prophet': results_prophet_cv[['mae', 'rmse', 'r2', 'accuracy', 'precision', 'recall', 'f1_score']].mean()
})

print(comparison_cv.round(3))

# Ganadores por m√©trica
print("\nüèÜ Ganadores por m√©trica:")
for metric in ['mae', 'rmse', 'r2', 'accuracy', 'f1_score']:
    if metric in ['r2', 'accuracy', 'f1_score']:  # Mayor es mejor
        winner = comparison_cv.loc[metric].idxmax()
        value = comparison_cv.loc[metric].max()
    else:  # Menor es mejor (MAE, RMSE)
        winner = comparison_cv.loc[metric].idxmin()
        value = comparison_cv.loc[metric].min()
    
    print(f"  {metric.upper():12s}: {winner:10s} ({value:.3f})")