# Time Series Forecasting

In [1]:
import darts
import pandas as pd
import plotly.graph_objects as go
from darts.datasets import AirPassengersDataset
from darts.metrics import mae
from darts.models import NaiveMean, NaiveSeasonal
from darts.utils.statistics import (
    check_seasonality,
    extract_trend_and_seasonality,
    stationarity_test_adf,
)
from plotly.express import line
from plotly.subplots import make_subplots

series: darts.TimeSeries = AirPassengersDataset().load()
dataframe: pd.DataFrame = series.to_dataframe(backend="pandas", time_as_index=False)


  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore


In [2]:
line(
    x=dataframe["Month"],
    y=dataframe["#Passengers"],
    width=800,
    height=400,
    labels={"x": "Month", "y": "Passengers"},
    title="Air Passengers Over Time",
)

In [3]:
check_seasonality(series)


(True, np.int64(12))

Am I really going to believe whatever the computer says? Yes, yes I will.

In [4]:
x, y = extract_trend_and_seasonality(series)
x_df = x.to_dataframe(backend="pandas", time_as_index=False)
y_df = y.to_dataframe(backend="pandas", time_as_index=False)

In [5]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(
    go.Scatter(x=x_df["Month"], y=x_df["0"]),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(x=y_df["Month"], y=y_df["0"]),
    row=1,
    col=2,
)

fig.update_layout(height=400, title_text="Trend and Seasonality")
fig.show()


In [6]:
stationarity_test_adf(series)

(np.float64(0.8153688792060498),
 np.float64(0.991880243437641),
 13,
 130,
 {'1%': np.float64(-3.4816817173418295),
  '5%': np.float64(-2.8840418343195267),
  '10%': np.float64(-2.578770059171598)},
 np.float64(996.692930839019))

The uncertainty of the forecast is just as important as, or even more so, than the forecast itself.

### Split the data

In [None]:
train: darts.TimeSeries
val: darts.TimeSeries
train, val = series.split_before(pd.Timestamp("19580101"))
train_df = train.to_dataframe(backend="pandas", time_as_index=False)
val_df = val.to_dataframe(backend="pandas", time_as_index=False)
fig2 = make_subplots(rows=1, cols=1)

fig2.add_trace(
    go.Scatter(
        x=train_df["Month"],
        y=train_df["#Passengers"],
        name="Train",
    ),
    row=1,
    col=1,
)
fig2.add_trace(
    go.Scatter(
        x=val_df["Month"],
        y=val_df["#Passengers"],
        name="Validation",
    ),
    row=1,
    col=1,
)

fig2.update_layout(height=400, title_text="Trend and Seasonality")
fig2.show()


### Models

Let's start with a naive approach

In [None]:
model = NaiveMean()
model.fit(train)
naive_mean_forecast = model.predict(36)

forecast_df = naive_mean_forecast.to_dataframe(backend="pandas", time_as_index=False)
fig2 = make_subplots(rows=1, cols=1)

fig2.add_trace(
    go.Scatter(
        x=train_df["Month"],
        y=train_df["#Passengers"],
        name="Train",
    ),
    row=1,
    col=1,
)
fig2.add_trace(
    go.Scatter(
        x=val_df["Month"],
        y=val_df["#Passengers"],
        name="Validation",
    ),
    row=1,
    col=1,
)
fig2.add_trace(
    go.Scatter(
        x=forecast_df["Month"],
        y=forecast_df["#Passengers"],
        name="Forecast",
    ),
    row=1,
    col=1,
)

fig2.update_layout(height=400, title_text="Trend and Seasonality")
fig2.show()


In [None]:
model = NaiveSeasonal(K=12)
model.fit(train)
naive_seasonal_forecast = model.predict(36)

forecast_df = naive_seasonal_forecast.to_dataframe(
    backend="pandas",
    time_as_index=False,
)
fig2 = make_subplots(rows=1, cols=1)

fig2.add_trace(
    go.Scatter(
        x=train_df["Month"],
        y=train_df["#Passengers"],
        name="Train",
    ),
    row=1,
    col=1,
)
fig2.add_trace(
    go.Scatter(
        x=val_df["Month"],
        y=val_df["#Passengers"],
        name="Validation",
    ),
    row=1,
    col=1,
)
fig2.add_trace(
    go.Scatter(
        x=forecast_df["Month"],
        y=forecast_df["#Passengers"],
        name="Forecast",
    ),
    row=1,
    col=1,
)

fig2.update_layout(height=400, title_text="Trend and Seasonality")
fig2.show()


Okay but how wrong is it?

In [10]:
naive_mean_mae = mae(series, naive_mean_forecast)
naive_seasonal_mae = mae(series, naive_seasonal_forecast)
print(f"MAE for the naive mean: {naive_mean_mae}")  # noqa: T201
print(f"MAE for the naive seasonal: {naive_seasonal_mae}")  # noqa: T201


MAE for the naive mean: 197.60185185185188
MAE for the naive seasonal: 60.083333333333336


### Trying other models

In [11]:
from darts.models import AutoARIMA, ExponentialSmoothing, Theta
from darts.models.forecasting.forecasting_model import ForecastingModel


def eval_model(
    model: ForecastingModel,
    training_data: darts.TimeSeries,
) -> None:
    """Evaluate a darts model on the training data and print the MAE.

    Args:
    ----
    model: A darts model.
    training_data: A list of training data.

    """
    model.fit(training_data)
    forecast = model.predict(len(val))
    print(f"model {model} obtains MAE: {mae(val, forecast):.2f}")  # noqa: T201


eval_model(ExponentialSmoothing(), train)
eval_model(AutoARIMA(), train)
eval_model(Theta(), train)


model ExponentialSmoothing() obtains MAE: 21.54
model AutoARIMA() obtains MAE: 55.86
model Theta() obtains MAE: 37.97


### Machine Learning

In [12]:
from darts.models import NBEATSModel

model = NBEATSModel(input_chunk_length=24, output_chunk_length=12, random_state=42)

model.fit(train, epochs=50, verbose=True)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.787    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode

'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.



Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


NBEATSModel(output_chunk_shift=0, generic_architecture=True, num_stacks=30, num_blocks=1, num_layers=4, layer_widths=256, expansion_coefficient_dim=5, trend_polynomial_degree=2, dropout=0.0, activation=ReLU, input_chunk_length=24, output_chunk_length=12, random_state=42)