In [None]:
%pip install -r requirements.txt --no-cache-dir

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utilsforecast.plotting import plot_series
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import *

from statsforecast import StatsForecast
from statsforecast.models import Naive, HistoricAverage, WindowAverage, SeasonalNaive, AutoARIMA

from functools import partial
from utilsforecast.feature_engineering import fourier, time_features, pipeline

import warnings
warnings.filterwarnings("ignore")

***Prediction Intervals:*** A range of values with an associated probability that the actual values are inside that range.

So far, we have applied point forecasting. A point forecast does not consider the uncertainty of the future. Uncertainty is measured with a prediction interval.  
**Ex:** A 95% prediction interval means that there is a probability of 95% that the actual values fall within the range.

Stochastic models can inherently generate prediction intervals because uncertainty is embedded in their models.  
ARIMA is a stochastic model, because it includes an error term that follows the normal distribution.

In [None]:
df = pd.read_csv("daily_sales_french_bakery.csv", parse_dates=["ds"])
df = df.groupby("unique_id").filter(lambda x: len(x) >= 28)
unique_ids = ["BAGUETTE", "CROISSANT"]
small_df = df[df["unique_id"].isin(unique_ids)]
test = small_df.groupby("unique_id").tail(7)
train = small_df.drop(test.index).reset_index(drop=True)

In [None]:
train.head()

In [None]:
horizon = 7

models = [
    AutoARIMA(season_length=7)
]

sf = StatsForecast(models=models, freq="D")
sf.fit(df=train)
prob_preds = sf.predict(h=horizon, X_df=test.drop(["y"], axis=1), level=[80])

In [None]:
results_df = test.merge(prob_preds, how="left", on=["unique_id", "ds"])

In [None]:
plot_series(
    df=train,
    forecasts_df=results_df,
    ids=unique_ids,
    max_insample_length=28,
    models=["AutoARIMA"],
    level=[80],
    palette="viridis"
)

In [None]:
models = [
    AutoARIMA(season_length=7)
]

sf = StatsForecast(models=models, freq="D")
cv_prob_df = sf.cross_validation(
    h=horizon,
    df=small_df,
    n_windows=8,
    step_size=7,
    refit=True,
    level=[80]
)

In [None]:
plot_series(
    df=small_df,
    forecasts_df=cv_prob_df.drop(["y", "cutoff"], axis=1),
    ids=unique_ids,
    models=["AutoARIMA"],
    max_insample_length=140,
    level=[80],
    palette="viridis"
)

***Evaluation Metrics:*** Depending on the type of forecasts, different evaluation metrics must be used.

**Point Forecasts:** MAE - MSE - RMSE - sMAPE - MASE  
**Probabilistic Forecasts:** CRPS

In [None]:
models = [
    AutoARIMA(season_length=7, alias="SARIMA_exog"),
    SeasonalNaive(season_length=7)
]

sf = StatsForecast(models=models, freq="D")
final_cv_df = sf.cross_validation(
    h=horizon,
    df=small_df,
    n_windows=8,
    step_size=7,
    refit=True,
    level=[80]
)

In [None]:
final_cv_df.head()

In [None]:
temp_test = small_df.groupby("unique_id").tail(7*8)
temp_train = small_df.drop(temp_test.index).reset_index(drop=True)

In [None]:
models = ["SARIMA_exog", "SeasonalNaive"]
metrics = [
    mae,
    mse,
    rmse,
    mape,
    smape,
    partial(mase, seasonality=7),
    scaled_crps
]

final_eval = evaluate(
    final_cv_df.drop(["ds", "cutoff"], axis=1),
    metrics=metrics,
    models=models,
    train_df=temp_train,
    level=[80]
)

final_eval = final_eval.drop(["unique_id"], axis=1).groupby("metric").mean().reset_index()
final_eval