## Deepar

In [1]:
import json
from gluonts.dataset.common import ListDataset
from gluonts.torch.model.deepar import DeepAREstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
import matplotlib.pyplot as plt
from itertools import islice
from gluonts.evaluation import Evaluator
import pandas as pd

In [2]:
# Load JSONL into a list of dicts
with open("../data/processed/deepar_dataset.jsonl") as f:
    series_list = [json.loads(line) for line in f]


In [3]:
#subset 100 data points
s = series_list[0]
dataset = ListDataset(series_list, freq="5min")

In [None]:
from gluonts.torch.model.deepar import DeepAREstimator

estimator = DeepAREstimator(
    prediction_length=288,
    context_length=288,
    freq="5min",
    lags_seq=[1, 2, 3, 6, 12, 24, 48, 96, 192],
    hidden_size=80,     # RNN cell size
    num_layers=3,       # Depth of the network
    dropout_rate=0.1,
    lr=1e-3,
    weight_decay=1e-8,
    batch_size=64,
    num_batches_per_epoch=50,
    trainer_kwargs={
        "max_epochs": 50,
        "accelerator": "cpu",
        "gradient_clip_val": 10.0,
        "logger": False,
    }
)


In [10]:
predictor = estimator.train(dataset)


Exception: invalid frequency

In [15]:
# Evaluate
forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset,
    predictor=predictor,
    num_samples=100
)
forecast_list = list(forecast_it)
ts_list = list(ts_it)

In [16]:

# Evaluation
evaluator = Evaluator()
agg_metrics, item_metrics = evaluator(ts_list, forecast_list)

# Print summary
print("\nAggregate Metrics:")
for k, v in agg_metrics.items():
    print(f"{k:>20}: {v:.4f}")


Running evaluation: 5it [00:00, 18.23it/s]



Aggregate Metrics:
                 MSE: 1.4311
           abs_error: 1045.1602
      abs_target_sum: 2389.0000
     abs_target_mean: 1.6590
      seasonal_error: 0.8808
                MASE: 0.9508
                MAPE: 0.3061
               sMAPE: 0.3974
                MSIS: 12.4072
num_masked_target_values: 0.0000
   QuantileLoss[0.1]: 248.4927
       Coverage[0.1]: 0.0035
   QuantileLoss[0.2]: 469.2538
       Coverage[0.2]: 0.1472
   QuantileLoss[0.3]: 696.2949
       Coverage[0.3]: 0.2444
   QuantileLoss[0.4]: 901.0123
       Coverage[0.4]: 0.3778
   QuantileLoss[0.5]: 1045.1601
       Coverage[0.5]: 0.4049
   QuantileLoss[0.6]: 1161.5135
       Coverage[0.6]: 0.4333
   QuantileLoss[0.7]: 1225.4822
       Coverage[0.7]: 0.4590
   QuantileLoss[0.8]: 1221.7752
       Coverage[0.8]: 0.5292
   QuantileLoss[0.9]: 1108.9159
       Coverage[0.9]: 0.6611
                RMSE: 1.1963
               NRMSE: 0.7211
                  ND: 0.4375
  wQuantileLoss[0.1]: 0.1040
  wQuantileLoss[0.



In [7]:
n_plots = len(forecast_list)  # or set a lower limit for visual clarity
for i in range(n_plots):
    ts = ts_list[i]
    forecast = forecast_list[i]

    # Convert PeriodIndex to Timestamp for plotting
    ts_index = ts.index.to_timestamp()

    # Forecast timestamps
    start = forecast.start_date.to_timestamp()
    freq = pd.Timedelta(forecast.freq)
    forecast_index = pd.date_range(start=start, periods=len(forecast.mean), freq=freq)

    # Determine x-axis limits (last 2 days)
    last_time = ts_index[-1]
    xlim_start = last_time - pd.Timedelta(days=2)
    xlim_end = last_time + pd.Timedelta(minutes=5 * len(forecast.mean))  # include forecast horizon

    # Plot
    plt.figure(figsize=(14, 5))
    plt.plot(ts_index, ts.values, label="True values", color="black")
    plt.plot(forecast_index, forecast.mean, label="Forecast (mean)", color="blue")

    plt.title(f"Forecast vs True (Series {i})")
    plt.legend()
    plt.grid(True)
    plt.xlim(xlim_start, xlim_end)
    plt.tight_layout()
    plt.show()


NameError: name 'forecast_list' is not defined

In [None]:

import optuna
from gluonts.evaluation import Evaluator
from gluonts.dataset.common import ListDataset
from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer

def objective(trial):
    hs = trial.suggest_categorical("hidden_size", [40, 80, 120])
    cl = trial.suggest_categorical("context_length", [144, 288])
    dr = trial.suggest_float("dropout_rate", 0.0, 0.3)

    estimator = DeepAREstimator(
        prediction_length=prediction_length,
        context_length=cl,
        freq=freq,
        hidden_size=hs,
        dropout_rate=dr,
        trainer=Trainer(epochs=5)
    )

    predictor = estimator.train(train_ds)

    forecast_it, ts_it = make_evaluation_predictions(dataset=test_ds, predictor=predictor, num_samples=100)
    agg_metrics, _ = Evaluator()(ts_it, forecast_it)

    return agg_metrics["RMSE"]

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)
study.best_params


In [None]:

for forecast in forecasts:
    plt.figure(figsize=(10, 4))
    forecast.plot(color='g')
    plt.plot(prediction_index, target[-prediction_length:], color='orange')
    plt.fill_between(prediction_index, forecast.quantile(0.1), forecast.quantile(0.9), color='lightblue', alpha=0.4)
    plt.title("Forecast with 80% Prediction Interval")
    plt.grid(True)
    plt.show()
    break


In [None]:

external_columns = ['mempool_blocks_nTx']
exog_values = df[external_columns].T.values.tolist()

train_ds = ListDataset([{
    "target": df[target_column][:train_end_index].values,
    "start": df.index[0],
    "feat_dynamic_real": exog_values
}], freq=freq)

test_ds = ListDataset([{
    "target": df[target_column].values,
    "start": df.index[0],
    "feat_dynamic_real": exog_values
}], freq=freq)

estimator = DeepAREstimator(
    prediction_length=prediction_length,
    context_length=288,
    freq=freq,
    use_feat_dynamic_real=True,
    trainer=Trainer(epochs=5)
)


In [None]:

horizons = [72, 144, 288]
results = []

for horizon in horizons:
    estimator = DeepAREstimator(
        prediction_length=horizon,
        context_length=horizon * 2,
        freq=freq,
        trainer=Trainer(epochs=5)
    )
    predictor = estimator.train(train_ds)
    forecast_it, ts_it = make_evaluation_predictions(test_ds, predictor=predictor, num_samples=100)
    agg_metrics, _ = Evaluator()(ts_it, forecast_it)
    results.append((horizon, agg_metrics["RMSE"], agg_metrics["MAE"]))

results_df = pd.DataFrame(results, columns=["Horizon", "RMSE", "MAE"])
print(results_df)


In [None]:

split_points = list(range(5000, len(df), 288))
metrics_expanding = []

for split in split_points:
    train_target = df[target_column][:split]
    test_target = df[target_column][split:split+288]

    train_ds = ListDataset([{
        "target": train_target.values,
        "start": df.index[0],
        "feat_dynamic_real": [df["mempool_blocks_nTx"].values]
    }], freq=freq)

    test_ds = ListDataset([{
        "target": df[target_column].values,
        "start": df.index[0],
        "feat_dynamic_real": [df["mempool_blocks_nTx"].values]
    }], freq=freq)

    estimator = DeepAREstimator(
        prediction_length=288,
        context_length=288,
        freq=freq,
        use_feat_dynamic_real=True,
        trainer=Trainer(epochs=5)
    )
    predictor = estimator.train(train_ds)
    forecast_it, ts_it = make_evaluation_predictions(test_ds, predictor=predictor, num_samples=100)
    agg_metrics, _ = Evaluator()(ts_it, forecast_it)
    metrics_expanding.append((df.index[split], agg_metrics["RMSE"], agg_metrics["MAE"]))

metrics_expanding_df = pd.DataFrame(metrics_expanding, columns=["SplitTime", "RMSE", "MAE"])
metrics_expanding_df.plot(x="SplitTime", y=["RMSE", "MAE"], title="Expanding Window Evaluation")
