In [1]:
import pandas as pd
pd.set_option('precision', 3)
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import os
import numpy as np

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

import mxnet as mx
from gluonts.model.n_beats import NBEATSEnsembleEstimator
from gluonts.mx import Trainer
from gluonts.dataset.common import ListDataset
from gluonts.evaluation import make_evaluation_predictions

In [2]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]
output_path = "./output/nbeatsfredvars_202110"
if not os.path.exists(output_path):
    os.mkdir(output_path)

## Load Data From File

In [3]:
foodprice_df = pd.read_csv("./all_data.csv", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df

Unnamed: 0,Bakery and cereal products (excluding baby food),Dairy products and eggs,"Fish, seafood and other marine products",Food purchased from restaurants,Food,"Fruit, fruit preparations and nuts",Meat,Other food products and non-alcoholic beverages,Vegetables and vegetable preparations,DEXCAUS,...,XTIMVA01CAM657S,XTIMVA01CAM659S,XTIMVA01CAM664N,XTIMVA01CAM664S,XTIMVA01CAM667S,XTNTVA01CAM664N,XTNTVA01CAM664S,XTNTVA01CAM667S,TOTALNS,TOTALSL
1986-01-01,69.3,70.9,60.6,59.1,67.3,76.0,65.1,77.5,76.0,1.392,...,3.644,12.051,9.368e+09,9.496e+09,6.749e+09,6.906e+08,1.052e+09,7.479e+08,607.369,605.703
1986-02-01,70.3,70.8,61.3,59.1,66.9,77.6,64.2,78.1,68.4,1.392,...,1.965,16.745,9.495e+09,9.632e+09,6.881e+09,-9.880e+07,1.539e+08,1.099e+08,605.807,610.678
1986-03-01,70.6,71.1,61.3,59.3,67.0,79.2,64.2,78.6,66.2,1.392,...,-11.565,1.655,8.803e+09,8.529e+09,6.085e+09,9.138e+08,9.079e+08,6.478e+08,606.799,613.377
1986-04-01,71.3,71.0,61.4,59.7,67.7,82.2,63.6,79.5,71.1,1.392,...,13.334,10.821,1.034e+10,9.569e+09,6.897e+09,3.470e+08,6.563e+08,4.730e+08,614.367,619.658
1986-05-01,71.2,71.4,61.9,59.9,68.2,83.5,64.0,79.8,75.3,1.377,...,-4.236,6.160,9.598e+09,9.091e+09,6.605e+09,7.013e+08,6.893e+08,5.008e+08,621.915,625.820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-01,157.7,145.3,146.2,163.9,156.8,144.5,176.7,142.2,153.4,1.203,...,-1.087,31.561,5.159e+10,4.979e+10,4.075e+10,3.661e+09,3.111e+09,2.546e+09,4259.604,4307.137
2021-07-01,157.9,146.4,146.6,165.2,157.6,141.7,180.9,141.9,154.8,1.240,...,1.990,18.580,5.106e+10,5.205e+10,4.156e+10,3.780e+08,1.129e+09,9.018e+08,4276.202,4323.730
2021-08-01,158.5,148.3,146.8,165.9,158.0,142.5,182.1,141.7,152.2,1.251,...,-2.312,14.723,5.245e+10,5.115e+10,4.060e+10,1.300e+09,2.674e+09,2.123e+09,4316.303,4337.489
2021-09-01,158.1,148.0,147.1,165.9,158.5,141.5,184.8,144.3,150.0,1.262,...,-3.461,8.557,5.080e+10,4.965e+10,3.920e+10,1.561e+09,2.277e+09,1.798e+09,4346.662,4367.402


## Fitting and Evaluating a Single NBEATS Model: Example Using All Food Prices

In [5]:
N = foodprice_df.shape[1]
T = foodprice_df.shape[0]
prediction_length = 18
freq = "MS"
dataset = foodprice_df.T.values
start = pd.Timestamp("2016-07-01")

In [6]:
dataset_df = foodprice_df.T
dataset_df

Unnamed: 0,1986-01-01,1986-02-01,1986-03-01,1986-04-01,1986-05-01,1986-06-01,1986-07-01,1986-08-01,1986-09-01,1986-10-01,...,2021-01-01,2021-02-01,2021-03-01,2021-04-01,2021-05-01,2021-06-01,2021-07-01,2021-08-01,2021-09-01,2021-10-01
Bakery and cereal products (excluding baby food),6.930e+01,7.030e+01,7.060e+01,7.130e+01,7.120e+01,7.110e+01,7.170e+01,7.190e+01,7.170e+01,7.110e+01,...,1.542e+02,1.571e+02,1.568e+02,1.562e+02,1.578e+02,1.577e+02,1.579e+02,1.585e+02,1.581e+02,1.593e+02
Dairy products and eggs,7.090e+01,7.080e+01,7.110e+01,7.100e+01,7.140e+01,7.110e+01,7.130e+01,7.150e+01,7.180e+01,7.180e+01,...,1.416e+02,1.431e+02,1.449e+02,1.461e+02,1.466e+02,1.453e+02,1.464e+02,1.483e+02,1.480e+02,1.466e+02
"Fish, seafood and other marine products",6.060e+01,6.130e+01,6.130e+01,6.140e+01,6.190e+01,6.200e+01,6.220e+01,6.270e+01,6.310e+01,6.360e+01,...,1.434e+02,1.439e+02,1.449e+02,1.451e+02,1.476e+02,1.462e+02,1.466e+02,1.468e+02,1.471e+02,1.493e+02
Food purchased from restaurants,5.910e+01,5.910e+01,5.930e+01,5.970e+01,5.990e+01,6.000e+01,6.060e+01,6.090e+01,6.090e+01,6.130e+01,...,1.626e+02,1.629e+02,1.626e+02,1.632e+02,1.635e+02,1.639e+02,1.652e+02,1.659e+02,1.659e+02,1.671e+02
Food,6.730e+01,6.690e+01,6.700e+01,6.770e+01,6.820e+01,6.840e+01,6.920e+01,6.950e+01,6.990e+01,7.020e+01,...,1.550e+02,1.556e+02,1.555e+02,1.554e+02,1.566e+02,1.568e+02,1.576e+02,1.580e+02,1.585e+02,1.592e+02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XTNTVA01CAM664N,6.906e+08,-9.880e+07,9.138e+08,3.470e+08,7.013e+08,5.973e+08,-4.890e+08,9.390e+07,1.542e+09,1.262e+09,...,3.784e+09,2.126e+09,-7.842e+08,-8.295e+08,-3.348e+08,3.661e+09,3.780e+08,1.300e+09,1.561e+09,1.561e+09
XTNTVA01CAM664S,1.052e+09,1.539e+08,9.079e+08,6.563e+08,6.893e+08,5.314e+08,-9.670e+07,2.959e+08,1.222e+09,1.165e+09,...,2.094e+09,1.701e+09,-1.682e+08,1.307e+09,-4.467e+08,3.111e+09,1.129e+09,2.674e+09,2.277e+09,2.277e+09
XTNTVA01CAM667S,7.479e+08,1.099e+08,6.478e+08,4.730e+08,5.008e+08,3.823e+08,-7.004e+07,2.131e+08,8.806e+08,8.390e+08,...,1.646e+09,1.339e+09,-1.338e+08,1.045e+09,-3.685e+08,2.546e+09,9.018e+08,2.123e+09,1.798e+09,1.798e+09
TOTALNS,6.074e+02,6.058e+02,6.068e+02,6.144e+02,6.219e+02,6.279e+02,6.336e+02,6.405e+02,6.499e+02,6.567e+02,...,4.177e+03,4.166e+03,4.167e+03,4.186e+03,4.227e+03,4.260e+03,4.276e+03,4.316e+03,4.347e+03,4.347e+03


## Fit Models Using All Data To Produce Final Forecast

In [8]:
cutoff_date = foodprice_df.index[-1]  # Select the most recent date available.
prediction_length = 18

train_dates = foodprice_df.loc[foodprice_df.index <= cutoff_date].index

train_ds = ListDataset(
    [{'target': x, 'start': train_dates[0]} for x in dataset_df[list(train_dates)].values],
    freq='MS'
)

# Note: We need to add empty forecast dates since make_evaluation_predictions will cut off prediction_length many entries.
forecast_dates = pd.date_range(pd.to_datetime(cutoff_date) + pd.DateOffset(months=1), pd.to_datetime(cutoff_date) + pd.DateOffset(months=prediction_length), freq='MS')

extra_series = []
for date in forecast_dates:
    extra_series.append(pd.Series(np.zeros_like(dataset_df.index), name=date, index=dataset_df.index))

forecast_df = dataset_df.join(extra_series)

forecast_ds = ListDataset(
    [{'target': x, 'start': train_dates[0]} for x in forecast_df[list(train_dates) + list(forecast_dates)].values],
    freq='MS'
)

estimator = NBEATSEnsembleEstimator(
    prediction_length=prediction_length,
    meta_bagging_size = 3, 
    meta_context_length = [prediction_length * m for m in [3,5,7]], 
    meta_loss_function = ['sMAPE'], 
    num_stacks = 30,
    widths= [512],
    freq="MS",
    trainer=Trainer(
                epochs=100,
                num_batches_per_epoch=200,
                ctx=mx.context.cpu()
            )

)

predictor = estimator.train(train_ds)

forecast_it, ts_it = make_evaluation_predictions(
    dataset=forecast_ds,  # train dataset
    predictor=predictor,  # predictor
)

forecasts = list(forecast_it)

all_food_metrics = {}
food_forecasts = {}

for target_index in range(len(forecasts)):

    # Get food price category
    foodprice_category = foodprice_df.columns[target_index]

    if foodprice_category not in food_categories:
        continue

    # plot actual
    fig, ax = plt.subplots(figsize=(8,3))
    ax.plot(train_dates, foodprice_df[foodprice_category][train_dates], color='black')

    # plot forecast
    forecast_entry = forecasts[target_index]
    ax.plot(forecast_dates, forecast_entry.mean[:len(forecast_dates)], color='C0')

    plt.title(f"{foodprice_category}, October 2021 Forecast")
    plt.grid()
    plt.show()

    food_forecasts[foodprice_category] = pd.Series(forecast_entry.mean, index=forecast_dates, name=foodprice_category)

all_forecasts = pd.DataFrame(food_forecasts)
all_forecasts.to_csv(f"{output_path}/fc_final.csv")

  timestamp = pd.Timestamp(string, freq=freq)
  if isinstance(timestamp.freq, Tick):
  return timestamp.freq.rollforward(timestamp)


TRAINER:gluonts.mx.trainer._base.Trainer(add_default_callbacks=True, batch_size=None, callbacks=None, clip_gradient=10.0, ctx=mxnet.context.Context("cpu", 0), epochs=100, hybridize=True, init="xavier", learning_rate=0.001, learning_rate_decay_factor=0.5, minimum_learning_rate=5e-05, num_batches_per_epoch=200, patience=10, weight_decay=1e-08)


  return _shift_timestamp_helper(ts, ts.freq, offset)
  return _shift_timestamp_helper(ts, ts.freq, offset)
100%|██████████| 200/200 [00:45<00:00,  4.43it/s, epoch=1/100, avg_epoch_loss=3.02]
  2%|▏         | 4/200 [00:00<00:44,  4.42it/s, epoch=2/100, avg_epoch_loss=3.38]


KeyboardInterrupt: 