# Predicción de Series Temporales
# Amazon's DeepAR

## Importing the data

In [1]:
import pandas as pd
y = pd.read_csv('../data/class/air_visit_data.csv.zip')
y = y.pivot(index='visit_date', columns='air_store_id')['visitors']
y = y.fillna(0)
y = pd.DataFrame(y.sum(axis=1))

y = y.reset_index(drop=False)
y.columns = ['date', 'y']


FileNotFoundError: [Errno 2] No such file or directory: '../data/class/air_visit_data.csv.zip'

In [None]:
# pip install --upgrade mxnet~=1.7

In [None]:
# pip install mxnet==1.7.0.post2

In [None]:
#pip install gluonts

## Preparing the data format requered by the gluonts library

In [None]:
from gluonts.dataset.common import ListDataset
start = pd.Timestamp("01-01-2016", freq="H")
# train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
train_ds = ListDataset([{'target': y.loc[:450,'y'], 'start': start}], freq='H')
# test dataset: use the whole dataset, add "target" and "start" fields
test_ds = ListDataset([{'target': y['y'], 'start': start}],freq='H')


In [None]:
train_ds.list_data

In [None]:
test_ds.list_data

## Fitting the default DeepAR model

In [None]:
from gluonts.model.deepar import DeepAREstimator
#from gluonts.trainer import Trainer
from gluonts.mx.trainer import Trainer
import mxnet as mx
import numpy as np

np.random.seed(7)
mx.random.seed(7)

estimator = DeepAREstimator(
    prediction_length=28,
    context_length=100,
    freq='H',
    trainer=Trainer(#ctx="gpu", # remove if running on windows
                    epochs=5,
                    learning_rate=1e-3,
                    num_batches_per_epoch=100
                   )
)

predictor = estimator.train(train_ds)


## Prediction

In [None]:
predictions = predictor.predict(test_ds)
predictions = list(predictions)[0]
predictions = predictions.quantile(0.5)


In [None]:
predictions

##  R2 score and prediction graph

In [None]:
from sklearn.metrics import r2_score
print(r2_score( list(test_ds)[0]['target'][-28:], predictions))

import matplotlib.pyplot as plt
plt.plot(predictions)
plt.plot(list(test_ds)[0]['target'][-28:])
plt.legend(['predictions', 'actuals'])
plt.show()


## Probability forecast graph

In [None]:
from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

forecasts = list(forecast_it)
tss = list(ts_it)

ts_entry = tss[0]
forecast_entry = forecasts[0]

def plot_prob_forecasts(ts_entry, forecast_entry):
    plot_length = 150
    prediction_intervals = (50.0, 90.0)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    fig, ax = plt.subplots(1, 1, figsize=(10, 7))
    ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.show()

plot_prob_forecasts(ts_entry, forecast_entry)
plt.show()


## Preparing holidays and reservations data and adding them into the ListDataset

In [None]:
X_reservations = pd.read_csv('data/11/air_reserve.csv.zip')
X_reservations['visit_date'] = pd.to_datetime(X_reservations['visit_datetime']).dt.date
X_reservations = pd.DataFrame(X_reservations.groupby('visit_date')['reserve_visitors'].sum())
X_reservations = X_reservations.reset_index(drop = False)

# Convert to datatime for merging correctly
y.date = pd.to_datetime(y.date)
X_reservations.visit_date = pd.to_datetime(X_reservations.visit_date)

# Merging and filling missing dates with 0
y = y.merge(X_reservations, left_on = 'date', right_on =  'visit_date', how = 'left').fillna(0)

# Preparing and merging holidays data
holidays = pd.read_csv('data/11/date_info.csv.zip')
holidays.calendar_date = pd.to_datetime(holidays.calendar_date)
y = y.merge(holidays, left_on = 'date', right_on = 'calendar_date', how = 'left').fillna(0)

# Preparing the ListDatasets

train_ds = ListDataset([{
    'target': y.loc[:450,'y'],
    'start': start,
    'feat_dynamic_real': y.loc[:450,['reserve_visitors', 'holiday_flg']].values
    }], freq='H')

test_ds = ListDataset([{
    'target': y['y'], 
    'start': start, 
    'feat_dynamic_real': y.loc[:,['reserve_visitors', 'holiday_flg']].values
    }],freq='H')


## Same code for fitting a different model: this model contains the two additional regressors

In [None]:
np.random.seed(7)
mx.random.seed(7)

# Build and fit model
estimator = DeepAREstimator(
    prediction_length=28,
    context_length=100,
    freq='H',
    trainer=Trainer(# ctx="gpu", # remove if running on windows
                    epochs=5,
                    learning_rate=1e-3,
                    num_batches_per_epoch=100
                   )
)

predictor = estimator.train(train_ds)

# Make Predictions
predictions = predictor.predict(test_ds)
predictions = list(predictions)[0]
predictions = predictions.quantile(0.5)

# Compute and print R2 score
print(r2_score( list(test_ds)[0]['target'][-28:], predictions))


## Tuning the hyperparameters

In [None]:
np.random.seed(7)
mx.random.seed(7)

results = []

for learning_rate in [1e-4, 1e-2]:
  for num_layers in [2, 5]:
    for num_cells in [30, 100]:

      estimator = DeepAREstimator(
          prediction_length=28,
          freq=’H’,
          trainer=Trainer(ctx="gpu", # remove if on Windows
                          epochs=10,
                          learning_rate=learning_rate,
                          num_batches_per_epoch=100
                        ),
          num_layers = num_layers,
          num_cells = num_cells,
      )

      predictor = estimator.train(train_ds)

      predictions = predictor.predict(test_ds)

      r2 = r2_score(list(predictions)[0].quantile(0.5), list(test_ds)[0]['target'][-28:])
      result = [learning_rate, num_layers, num_cells, r2]
      print(result)
      results.append(result)
