In [0]:
%pip install prophet lightgbm prefect --no-deps --quiet
%pip install -U opentelemetry-api --quiet
dbutils.library.restartPython()

In [0]:
# Importaciones
import sys
sys.path.append("/Workspace/Repos/desareca/santiago-weather-forecast")

from src.data.ingestion import load_from_delta_table
from src.data.preprocessing import prepare_time_series
from src.models.arima_model import ARIMAPredictor
from src.models.prophet_model import ProphetPredictor
from src.evaluation.cross_validation import TimeSeriesSplit
from src.utils.config import *
import mlflow
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

mlflow.set_experiment(EXPERIMENT_NAME)
print("‚úÖ Setup completo")

In [0]:
# Cargar y preparar datos
df = load_from_delta_table("weather_raw", spark)
serie = prepare_time_series(df, target_col="precipitacion")

print(f"\nüìä Datos preparados:")
print(f"  Serie completa: {len(serie)} d√≠as")
print(f"  Fecha inicio: {serie.index.min().date()}")
print(f"  Fecha fin: {serie.index.max().date()}")

In [0]:
print("\n" + "="*70)
print("VISUALIZACI√ìN DE FOLDS")
print("="*70)

cv = TimeSeriesSplit(n_splits=5, test_size=30)
cv.visualize_splits(serie)

In [0]:
print("\n" + "="*70)
print("CROSS-VALIDATION: ARIMA(1,1,1)")
print("="*70)

arima = ARIMAPredictor(p=1, d=1, q=1)
results_arima_cv = arima.train_and_evaluate_cv(
    data=serie,
    n_splits=5,
    test_size=30,
    log_mlflow=True
)

In [0]:
print("\n" + "="*70)
print("CROSS-VALIDATION: Prophet")
print("="*70)

prophet = ProphetPredictor(
    yearly_seasonality=True,
    weekly_seasonality=False,
    changepoint_prior_scale=0.05
)
results_prophet_cv = prophet.train_and_evaluate_cv(
    data=serie,
    n_splits=5,
    test_size=30,
    log_mlflow=True
)

In [0]:
print("\n" + "="*70)
print("COMPARACI√ìN FINAL")
print("="*70)

comparison_cv = pd.DataFrame({
    'ARIMA': results_arima_cv[['mae', 'rmse', 'r2', 'accuracy', 'f1_score']].mean(),
    'Prophet': results_prophet_cv[['mae', 'rmse', 'r2', 'accuracy', 'f1_score']].mean()
})

print("\nüìä Promedios Cross-Validation:")
print(comparison_cv.round(3))

# Ganadores por m√©trica
print("\nüèÜ Ganadores por m√©trica:")
for metric in ['mae', 'rmse', 'r2', 'accuracy', 'f1_score']:
    if metric in ['r2', 'accuracy', 'f1_score']:
        winner = comparison_cv.loc[metric].idxmax()
        value = comparison_cv.loc[metric].max()
    else:
        winner = comparison_cv.loc[metric].idxmin()
        value = comparison_cv.loc[metric].min()
    print(f"  {metric.upper():12s}: {winner:10s} ({value:.3f})")