## DeepAR encoder-decoder neural net for timeseries forecasting
* Code from https://aws.amazon.com/blogs/machine-learning/creating-neural-time-series-models-with-gluon-time-series/
* DeepAR architecture paper https://arxiv.org/abs/1704.04110
* gluonTS python neural timeseries toolkit
  * documentation https://gluon-ts.mxnet.io/
  * gluonTS paper https://arxiv.org/pdf/1906.05264.pdf

In [None]:
! pip install mxnet --pre
! pip install gluonts --pre

In [None]:
from itertools import islice

from gluonts.dataset.common import ListDataset
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.seasonal_naive import SeasonalNaivePredictor
from gluonts.trainer import Trainer
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# get and plot some data
url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv"
df = pd.read_csv(url, header=0, index_col=0)

df[:500].plot(figsize=(12, 5), linewidth=2)
plt.grid()
plt.legend(["observations"])
plt.show()

In [None]:
# instantiate estimator
estimator = DeepAREstimator(
    freq="5min", 
    context_length=48,
    prediction_length=16,
    trainer=Trainer(epochs=50, learning_rate=0.001))

In [None]:
# instantiate dataset
training_data = ListDataset(
    [{"start": df.index[0], "target": df.value[:"2015-04-05 00:00:00"]}],
    freq = "5min")

In [None]:
# fit model
predictor = estimator.train(training_data=training_data)

In [None]:
# evaluate

test_data = ListDataset(
    [
        {"start": df.index[0], "target": df.value[:"2015-04-10 03:00:00"]},
        {"start": df.index[0], "target": df.value[:"2015-04-15 18:00:00"]},
        {"start": df.index[0], "target": df.value[:"2015-04-20 12:00:00"]}
    ],
    freq = "5min")


def plot_forecasts(tss, forecasts, past_length, num_plots):
    
    for target, forecast in islice(zip(tss, forecasts), num_plots):
        ax = target[-past_length:].plot(figsize=(12, 5), linewidth=2)
        forecast.plot(color='g')
        plt.grid(which='both')
        plt.legend(["observations", "median prediction", "90% confidence interval", "50% confidence interval"])
        plt.show()

forecast_it, ts_it = make_evaluation_predictions(test_data, predictor=predictor, num_eval_samples=100)
forecasts = list(forecast_it)
tss = list(ts_it)
plot_forecasts(tss, forecasts, past_length=250, num_plots=3)

In [None]:
evaluator = Evaluator(quantiles=[0.5], seasonality=2016)

agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data))
agg_metrics

In [None]:
# baseline vs seasonal naive model
seasonal_predictor_1W = SeasonalNaivePredictor(freq="5min", prediction_length=36, season_length=2016)

forecast_it, ts_it = make_evaluation_predictions(test_data, predictor=seasonal_predictor_1W, num_eval_samples=100)
forecasts = list(forecast_it)
tss = list(ts_it)

agg_metrics_seasonal, item_metrics_seasonal = evaluator(iter(tss), iter(forecasts), num_series=len(test_data))

df_metrics = pd.DataFrame.join(
    pd.DataFrame.from_dict(agg_metrics, orient='index').rename(columns={0: "DeepAR"}),
    pd.DataFrame.from_dict(agg_metrics_seasonal, orient='index').rename(columns={0: "Seasonal naive"})
)
df_metrics.loc[["MASE", "sMAPE", "RMSE"]]