This is a benchmark for Autogluon Forecasting task.  
Assume that we have two COV19 dataset, train_data.csv and test_data.csv in the following format:  

|Date|ConfirmedCases|name|
| ------ | ------ | ------ |
|2020-01-22|0.0|Afghanistan_|
|2020-01-23|0.0|Afghanistan_|
|2020-01-24|0.0|Afghanistan_|
|2020-01-25|0.0|Afghanistan_|
|2020-01-26|0.0|Afghanistan_|

The comfirmedcases here is the cummulative comfirmed cases up to that date in a certain country.

In [1]:
# Load dataset
from autogluon.forecasting import ForecastingPredictor
from autogluon.forecasting import TabularDataset

train_data = TabularDataset("https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/train.csv")
test_data = TabularDataset("https://autogluon.s3-us-west-2.amazonaws.com/datasets/CovidTimeSeries/test.csv")
prediction_length = 19
eval_metric = "mean_wQuantileLoss"

  "Using `json`-module for json-handling. "


In [2]:
train_data.head()

  and should_run_async(code)


Unnamed: 0,Date,ConfirmedCases,name
0,2020-01-22,0.0,Afghanistan_
1,2020-01-23,0.0,Afghanistan_
2,2020-01-24,0.0,Afghanistan_
3,2020-01-25,0.0,Afghanistan_
4,2020-01-26,0.0,Afghanistan_


## Directly using GluonTS to do forecasting

In [3]:
import pandas as pd


def rebuild_tabular(X, time_column, target_column, index_column=None):
    if index_column is None:
        X = X[[time_column, target_column]]
        X["index_column"] = ["time_series" for i in range(X.shape[0])]
        index_column = "index_column"
    time_list = sorted(list(set(X[time_column])))
    freq = pd.infer_freq(time_list)
    if freq is None:
        raise ValueError("Freq cannot be inferred. Check your dataset.")

    def reshape_dataframe(df):
        df = df.sort_values(by=index_column)
        data_dic = {index_column: sorted(list(set(df[index_column])))}

        for time in time_list:
            tmp = df[df[time_column] == time][[index_column, time_column, target_column]]
            tmp = tmp.pivot(index=index_column, columns=time_column, values=target_column)
            tmp_values = tmp[time].values
            data_dic[time] = tmp_values
        return pd.DataFrame(data_dic)

    X = reshape_dataframe(X)
    return X

  and should_run_async(code)


In [4]:
rebuilt_train = rebuild_tabular(train_data, time_column="Date", target_column="ConfirmedCases", index_column="name")
rebuilt_test = rebuild_tabular(test_data, time_column="Date", target_column="ConfirmedCases", index_column="name")

In [5]:
rebuilt_train.head()

Unnamed: 0,name,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31,2020-04-01,2020-04-02
0,Afghanistan_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,74.0,84.0,94.0,110.0,110.0,120.0,170.0,174.0,237.0,273.0
1,Albania_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,123.0,146.0,174.0,186.0,197.0,212.0,223.0,243.0,259.0,277.0
2,Algeria_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,264.0,302.0,367.0,409.0,454.0,511.0,584.0,716.0,847.0,986.0
3,Andorra_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,164.0,188.0,224.0,267.0,308.0,334.0,370.0,376.0,390.0,428.0
4,Angola_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,3.0,4.0,4.0,5.0,7.0,7.0,7.0,8.0,8.0


In [6]:
rebuilt_test.head()

  and should_run_async(code)


Unnamed: 0,name,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-04-12,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21
0,Afghanistan_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,607.0,665.0,714.0,784.0,840.0,906.0,933.0,996.0,1026.0,1092.0
1,Albania_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,446.0,467.0,475.0,494.0,518.0,539.0,548.0,562.0,584.0,609.0
2,Algeria_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1914.0,1983.0,2070.0,2160.0,2268.0,2418.0,2534.0,2629.0,2718.0,2811.0
3,Andorra_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,638.0,646.0,659.0,673.0,673.0,696.0,704.0,713.0,717.0,717.0
4,Angola_,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,19.0,19.0,19.0,19.0,19.0,19.0,24.0,24.0,24.0,24.0


Train-Val Split

In [7]:
rebuilt_val = rebuilt_train.copy()
rebuilt_train = rebuilt_train.iloc[:, :-prediction_length]

  and should_run_async(code)


Then transform the rebuilt tabular into GluonTS Listdata

In [8]:
from gluonts.dataset.field_names import FieldName
from gluonts.dataset.common import ListDataset

def create_gluonts_data(df, index_column):
    index = df[index_column]
    target = df.drop(index_column, axis=1)
    target_values = target.values
    date_list = target.columns
    freq = pd.infer_freq(date_list)
    data = [
        {
            FieldName.TARGET: target,
            FieldName.START: pd.Timestamp(date_list[0], freq=freq),
            FieldName.ITEM_ID: item_id
        }
        for (target, item_id) in zip(target_values, index)
    ]
    return ListDataset(data, freq)

In [9]:
gluonts_train_data = create_gluonts_data(rebuilt_train, "name")
gluonts_val_data = create_gluonts_data(rebuilt_val, "name")
gluonts_test_data = create_gluonts_data(rebuilt_test, "name")

Train and evaluate

In [10]:
from gluonts.model.seq2seq import MQCNNEstimator
from gluonts.mx.trainer import Trainer

params = {
    "prediction_length": 19,
    "freq": "D",
    "num_batches_per_epoch": 10,
    "epochs": 5,
    "quantiles": [0.1, 0.5, 0.9]
}
model = MQCNNEstimator.from_hyperparameters(**params)

predictor = model.train(gluonts_train_data)

100%|██████████| 10/10 [00:00<00:00, 16.60it/s, epoch=1/5, avg_epoch_loss=65.9]
100%|██████████| 10/10 [00:00<00:00, 18.37it/s, epoch=2/5, avg_epoch_loss=65]
100%|██████████| 10/10 [00:00<00:00, 17.63it/s, epoch=3/5, avg_epoch_loss=63.8]
100%|██████████| 10/10 [00:00<00:00, 18.04it/s, epoch=4/5, avg_epoch_loss=61.8]
100%|██████████| 10/10 [00:00<00:00, 18.13it/s, epoch=5/5, avg_epoch_loss=57.9]


In [11]:
from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from tqdm import tqdm


forecast_it, ts_it = make_evaluation_predictions(dataset=gluonts_test_data,
                                                 predictor=predictor,
                                                 num_samples=100)
forecasts, tss = list(forecast_it), list(ts_it)
# forecasts, tss = list(tqdm(forecast_it, total=len(gluonts_val_data))), list(tqdm(ts_it, total=len(gluonts_val_data)))
# print(forecasts[0], tss[0])
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(tss))
print(agg_metrics[eval_metric])

  and should_run_async(code)
Running evaluation: 100%|██████████| 313/313 [00:00<00:00, 5916.18it/s]
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error


0.8135262271039411


Get prediction results for 20 days after the dataset

In [12]:
result_dict = {}
predicted_targets = list(predictor.predict(gluonts_test_data))
index = sorted(list(set(train_data["name"])))

quantiles = [0.1, 0.5, 0.9]
for i in range(len(index)):
    tmp_dict = {}
    for quantile in quantiles:
        tmp_dict[quantile] = predicted_targets[i].quantile(str(quantile))
    df = pd.DataFrame(tmp_dict)
    df.index = pd.date_range(start=predicted_targets[i].start_date,
                             periods=prediction_length,
                             freq="D")
    result_dict[index[i]] = df
    
print(result_dict["Afghanistan_"])

                   0.1         0.5          0.9
2020-04-22  -67.290489   69.583473   809.334473
2020-04-23 -105.702316   83.647087  1000.546448
2020-04-24 -345.984497   59.762672   912.256531
2020-04-25    3.898935 -113.366554   834.492249
2020-04-26 -172.781479   84.556496   711.205505
2020-04-27 -104.401733   92.407059   749.321228
2020-04-28  -73.024734  -69.086647   913.753357
2020-04-29  -26.025049 -114.876076   860.888977
2020-04-30 -198.196960   46.084614   784.491089
2020-05-01 -223.879150  140.180588   764.425964
2020-05-02  -55.120819 -165.239029   998.736267
2020-05-03  111.290253  205.079834   715.738953
2020-05-04 -125.164833   59.508762   821.684265
2020-05-05  -80.372231  -65.367897   928.807678
2020-05-06 -208.609177   23.097174   910.660034
2020-05-07 -107.474663   94.180489   835.745239
2020-05-08   41.594910   -3.678510   906.624146
2020-05-09 -138.069046  -37.294048   894.540588
2020-05-10 -349.204132   15.326151  1076.700317


## Using AutoGluon To do this forecasting

In [14]:
from autogluon.forecasting import ForecastingPredictor

import autogluon.core as ag

# change this to specify eval metric, one of ["MASE", "MAPE", "sMAPE", "mean_wQuantileLoss"]
eval_metric = "mean_wQuantileLoss"

path = "benchmark_models"
predictor = ForecastingPredictor(path=path, eval_metric=eval_metric).fit(train_data,
                                                                         prediction_length,
                                                                         index_column="name",
                                                                         target_column="ConfirmedCases",
                                                                         time_column="Date",
                                                                         hyperparameter_tune_kwargs={
                                                                             'scheduler': 'local',
                                                                             'searcher': 'random',
                                                                             "num_trials": 5
                                                                         },
                                                                         quantiles=[0.1, 0.5, 0.9],
                                                                         refit_full=True,
                                                                         hyperparameters={
                                                                             "MQCNN": {
                                                                                 'context_length': ag.Int(70, 90,
                                                                                                          default=prediction_length * 4),
                                                                                 "num_batches_per_epoch": 10,
                                                                                 "epochs": 5},
                                                                         },

                                                                         )

refit_full is set while set_best_to_refit_full is not set, automatically setting set_best_to_refit_full=Trueto make sure that the model will predict with refit full model by default.
Training with dataset in tabular format...
Finish rebuilding the data, showing the top five rows.
           name  2020-01-22  2020-01-23  2020-01-24  2020-01-25  2020-01-26  \
0  Afghanistan_         0.0         0.0         0.0         0.0         0.0   
1      Albania_         0.0         0.0         0.0         0.0         0.0   
2      Algeria_         0.0         0.0         0.0         0.0         0.0   
3      Andorra_         0.0         0.0         0.0         0.0         0.0   
4       Angola_         0.0         0.0         0.0         0.0         0.0   

   2020-01-27  2020-01-28  2020-01-29  2020-01-30  ...  2020-03-24  \
0         0.0         0.0         0.0         0.0  ...        74.0   
1         0.0         0.0         0.0         0.0  ...       123.0   
2         0.0         0.0         

  0%|          | 0/5 [00:00<?, ?it/s]

Start model training
Epoch[0] Learning rate is 0.001

  0%|          | 0/10 [00:00<?, ?it/s][ANumber of parameters in ForkingSeq2SeqTrainingNetwork: 57784
100%|██████████| 10/10 [00:00<00:00, 16.83it/s, epoch=1/5, avg_epoch_loss=66.5]
Epoch[0] Elapsed time 0.596 seconds
Epoch[0] Evaluation metric 'epoch_loss'=66.517388

0it [00:00, ?it/s][ANumber of parameters in ForkingSeq2SeqTrainingNetwork: 57784
10it [00:00, 29.75it/s, epoch=1/5, validation_avg_epoch_loss=133]
Epoch[0] Elapsed time 0.337 seconds
Epoch[0] Evaluation metric 'validation_epoch_loss'=133.266467
Epoch[1] Learning rate is 0.001

100%|██████████| 10/10 [00:00<00:00, 18.01it/s, epoch=2/5, avg_epoch_loss=65.6]
Epoch[1] Elapsed time 0.557 seconds
Epoch[1] Evaluation metric 'epoch_loss'=65.587986

10it [00:00, 32.41it/s, epoch=2/5, validation_avg_epoch_loss=131]
Epoch[1] Elapsed time 0.310 seconds
Epoch[1] Evaluation metric 'validation_epoch_loss'=131.037222
Epoch[2] Learning rate is 0.001

100%|██████████| 10/10 [00:00<00:0


  0%|          | 0/313 [00:00<?, ?it/s][A
 31%|███       | 97/313 [00:00<00:00, 814.55it/s][A
100%|██████████| 313/313 [00:00<00:00, 1059.64it/s][A

100%|██████████| 313/313 [00:00<00:00, 7182.07it/s]

  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error

  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  subarr = np.array(values, dtype=dtype, copy=copy)
Validation score for model MQCNN/trial_1 is -0.8439218034922528
Serializing RepresentableBlockPredictor instances does not save the prediction network structure in a backwards-compatible manner. Be careful not to use this method in production.
Start mode

Epoch[0] Learning rate is 0.001

  0%|          | 0/10 [00:00<?, ?it/s][ANumber of parameters in ForkingSeq2SeqTrainingNetwork: 57784
100%|██████████| 10/10 [00:00<00:00, 15.11it/s, epoch=1/5, avg_epoch_loss=64.8]
Epoch[0] Elapsed time 0.663 seconds
Epoch[0] Evaluation metric 'epoch_loss'=64.800441

0it [00:00, ?it/s][ANumber of parameters in ForkingSeq2SeqTrainingNetwork: 57784
10it [00:00, 28.86it/s, epoch=1/5, validation_avg_epoch_loss=130]
Epoch[0] Elapsed time 0.348 seconds
Epoch[0] Evaluation metric 'validation_epoch_loss'=129.717323
Epoch[1] Learning rate is 0.001

100%|██████████| 10/10 [00:00<00:00, 15.50it/s, epoch=2/5, avg_epoch_loss=63.6]
Epoch[1] Elapsed time 0.647 seconds
Epoch[1] Evaluation metric 'epoch_loss'=63.645500

10it [00:00, 29.60it/s, epoch=2/5, validation_avg_epoch_loss=126]
Epoch[1] Elapsed time 0.339 seconds
Epoch[1] Evaluation metric 'validation_epoch_loss'=126.211406
Epoch[2] Learning rate is 0.001

100%|██████████| 10/10 [00:00<00:00, 16.60it/s, epoch=3

  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  subarr = np.array(values, dtype=dtype, copy=copy)
Validation score for model MQCNN/trial_4 is -0.8483867144836337
Serializing RepresentableBlockPredictor instances does not save the prediction network structure in a backwards-compatible manner. Be careful not to use this method in production.
AutoGluon training complete, total runtime = 28.88s ...
Fitting model: MQCNN/trial_3_FULL ...
Start model training
Epoch[0] Learning rate is 0.001
  0%|          | 0/10 [00:00<?, ?it/s]Number of parameters in ForkingSeq2SeqTrainingNetwork: 57784
100%|██████████| 10/10 [00:00<00:00, 14.91it/s, epoch=1/5, avg_epoch_loss=130]
Epoch[0] Elap

Evaluate the model

In [15]:
print(predictor.evaluate(test_data))

  and should_run_async(code)
Does not specify model, will by default use the model with the best validation score for evaluation
100%|██████████| 313/313 [00:00<00:00, 993.81it/s]
100%|██████████| 313/313 [00:00<00:00, 7188.99it/s]
Running evaluation: 100%|██████████| 313/313 [00:00<00:00, 5355.64it/s]
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error


0.5248646845645433


Get prediction results for 20 days after the dataset

In [16]:
predictions = predictor.predict(test_data, quantiles=[0.1, 0.5, 0.9])
print(predictions['Afghanistan_'])

Does not specify model, will by default use the model with the best validation score for prediction
Predicting with model MQCNN/trial_3_FULL


                   0.1          0.5          0.9
2020-04-22  242.078461   782.105225  2159.481445
2020-04-23  -22.214926   707.250366  1726.586792
2020-04-24   43.596775   845.501099  1766.657349
2020-04-25   58.433422   993.568848  1987.686279
2020-04-26  -28.051870   879.044495  1669.761597
2020-04-27  -86.180763   826.131653  2302.200195
2020-04-28  165.177979  1068.669312  1926.341064
2020-04-29   18.150400   838.976379  1977.361572
2020-04-30   16.907766   987.353638  2033.114380
2020-05-01  161.226837  1151.508911  1938.161377
2020-05-02   73.612251   755.289978  1841.992798
2020-05-03  154.759903   695.801636  1888.712769
2020-05-04  349.880035  1093.450195  2058.896240
2020-05-05  260.894470  1105.507812  2122.684326
2020-05-06  227.347748   996.746338  1840.529663
2020-05-07  179.547821   632.326355  2053.500244
2020-05-08  202.275330  1099.801880  2525.892334
2020-05-09   29.924116   657.542664  2162.740234
2020-05-10   -9.824925  1122.306763  2158.419678
