## Deepar

In [2]:
import json
from gluonts.dataset.common import ListDataset
from gluonts.torch.model.deepar import DeepAREstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
import matplotlib.pyplot as plt
from itertools import islice
from gluonts.evaluation import Evaluator
import pandas as pd


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/interestingtj/miniforge3/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/interestingtj/miniforge3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/interestingtj/miniforge3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.

ModuleNotFoundError: No module named 'lightning'

In [None]:
# Load JSONL into a list of dicts
with open("../data/processed/deepar_dataset.jsonl") as f:
    series_list = [json.loads(line) for line in f]


In [None]:
#subset 100 data points
s = series_list[0]
dataset = ListDataset(series_list, freq="15min")

In [None]:
from gluonts.torch.model.deepar import DeepAREstimator

estimator = DeepAREstimator(
    prediction_length=96,
    context_length=96,
    freq="15min",
    lags_seq=[1, 2, 3, 6, 12, 24, 48, 96, 192],
    hidden_size=80,     # RNN cell size
    num_layers=3,       # Depth of the network
    dropout_rate=0.1,
    lr=1e-3,
    weight_decay=1e-8,
    batch_size=64,
    num_batches_per_epoch=50,
    trainer_kwargs={
        "max_epochs": 30,
        "accelerator": "cpu",
        "gradient_clip_val": 10.0,
        "logger": False,
    }
)


In [None]:
predictor = estimator.train(dataset)


In [8]:
# Evaluate
forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset,
    predictor=predictor,
    num_samples=100
)
forecast_list = list(forecast_it)
ts_list = list(ts_it)

In [9]:

# Evaluation
evaluator = Evaluator()
agg_metrics, item_metrics = evaluator([ts_list[1]], [forecast_list[1]])

# Print summary
print("\nAggregate Metrics:")
for k, v in agg_metrics.items():
    print(f"{k:>20}: {v:.4f}")


Running evaluation: 1it [00:00, 12.09it/s]



Aggregate Metrics:
                 MSE: 470.2497
           abs_error: 1521.9934
      abs_target_sum: 1666.0000
     abs_target_mean: 17.3542
      seasonal_error: 1.7787
                MASE: 8.9133
                MAPE: 0.7811
               sMAPE: 1.3104
                MSIS: 285.7610
num_masked_target_values: 0.0000
   QuantileLoss[0.1]: 317.9515
       Coverage[0.1]: 0.0104
   QuantileLoss[0.2]: 629.3241
       Coverage[0.2]: 0.0417
   QuantileLoss[0.3]: 934.9501
       Coverage[0.3]: 0.0625
   QuantileLoss[0.4]: 1233.5151
       Coverage[0.4]: 0.1146
   QuantileLoss[0.5]: 1521.9935
       Coverage[0.5]: 0.1458
   QuantileLoss[0.6]: 1796.6936
       Coverage[0.6]: 0.1458
   QuantileLoss[0.7]: 2048.4883
       Coverage[0.7]: 0.2083
   QuantileLoss[0.8]: 2269.0766
       Coverage[0.8]: 0.2500
   QuantileLoss[0.9]: 2431.5238
       Coverage[0.9]: 0.2708
                RMSE: 21.6852
               NRMSE: 1.2496
                  ND: 0.9136
  wQuantileLoss[0.1]: 0.1908
  wQuantileL



In [1]:
n_plots = len(forecast_list)  # or set a lower limit for visual clarity
for i in range(n_plots):
    ts = ts_list[i]
    forecast = forecast_list[i]

    # Convert PeriodIndex to Timestamp for plotting
    ts_index = ts.index.to_timestamp()

    # Forecast timestamps
    start = forecast.start_date.to_timestamp()
    freq = pd.Timedelta(forecast.freq)
    forecast_index = pd.date_range(start=start, periods=len(forecast.mean), freq=freq)

    # Determine x-axis limits (last 2 days)
    last_time = ts_index[-1]
    xlim_start = last_time - pd.Timedelta(days=2)
    xlim_end = last_time + pd.Timedelta(minutes=15 * len(forecast.mean))  # include forecast horizon

    # Plot
    plt.figure(figsize=(14, 5))
    plt.plot(ts_index, ts.values, label="True values", color="black")
    plt.plot(forecast_index, forecast.mean, label="Forecast (mean)", color="blue")

    plt.title(f"Forecast vs True (Series {i})")
    plt.legend()
    plt.grid(True)
    plt.xlim(xlim_start, xlim_end)
    plt.tight_layout()
    plt.show()


NameError: name 'forecast_list' is not defined