In [None]:
import pandas as pd
pd.set_option('precision', 3)

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import os

import mxnet as mx
from gluonts.model.n_beats import NBEATSEnsembleEstimator
from gluonts.mx import Trainer
from gluonts.dataset.common import ListDataset
from gluonts.evaluation import make_evaluation_predictions


In [None]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]
output_path = "./output/nbeats_TEST"
if not os.path.exists(output_path):
    os.mkdir(output_path)

## Load Data From File

In [None]:
foodprice_categories = pd.read_csv("./foodprice_categories.txt", sep='\n', header=None)[0].to_list()
foodprice_df = pd.read_csv("./all_data.csv", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df

In [None]:
dataset_df = foodprice_df[foodprice_categories].T
dataset_df

In [None]:
def get_prophet_df(foodprice_df, food_category, dates):
    df = foodprice_df[food_category][dates]
    df = df.reset_index()
    df = df.rename({'REF_DATE':'ds', food_category:'y'}, axis=1)
    return df

## Fit Models Using All Data To Produce Final Forecast

In [None]:
cutoff_date = "2021-10-01"
prediction_length = 18

train_dates = foodprice_df.loc[foodprice_df.index <= cutoff_date].index

train_ds = ListDataset(
    [{'target': x, 'start': train_dates[0]} for x in dataset_df[list(train_dates)].values],
    freq='MS'
)

# Need to add empty forecast dates since make_evaluation_predictions will cut off prediction_length many entries.

forecast_dates = pd.date_range(pd.to_datetime(cutoff_date) + pd.DateOffset(months=1), pd.to_datetime(cutoff_date) + pd.DateOffset(months=prediction_length), freq='MS')

extra_series = []
for date in forecast_dates:
    extra_series.append(pd.Series(np.zeros_like(dataset_df.index), name=date, index=dataset_df.index))

forecast_df = dataset_df.join(extra_series)

forecast_ds = ListDataset(
    [{'target': x, 'start': train_dates[0]} for x in forecast_df[list(train_dates) + list(forecast_dates)].values],
    freq='MS'
)

estimator = NBEATSEnsembleEstimator(
    prediction_length=prediction_length,
    meta_bagging_size = 3,  # 3
    meta_context_length = [prediction_length * m for m in [3,5,7]], 
    meta_loss_function = ['sMAPE'], 
    num_stacks = 30,
    widths= [512],
    freq="MS",
    trainer=Trainer(
                epochs=1,  # 50
                ctx=mx.context.cpu()
            )

)

predictor = estimator.train(train_ds)

forecast_it, ts_it = make_evaluation_predictions(
    dataset=forecast_ds,  # train dataset
    predictor=predictor,  # predictor
)

forecasts = list(forecast_it)

all_food_metrics = {}
food_forecasts = {}

for target_index in range(len(forecasts)):

    # Get food price category
    foodprice_category = foodprice_df.columns[target_index]

    # plot actual
    fig, ax = plt.subplots(figsize=(8,3))
    ax.plot(train_dates, foodprice_df[foodprice_category][train_dates], color='black')

    # plot forecast
    forecast_entry = forecasts[target_index]
    ax.plot(forecast_dates, forecast_entry.mean[:len(forecast_dates)], color='C0')

    plt.title(f"{foodprice_category}, October 2021 Forecast")
    plt.grid()
    plt.show()

    food_forecasts[foodprice_category] = pd.Series(forecast_entry.mean, index=forecast_dates, name=foodprice_category)

all_forecasts = pd.DataFrame(food_forecasts)
all_forecasts.to_csv(f"{output_path}/fc_final.csv")