In [1]:
!pip install gluonts



In [2]:
# required for importing gluonts backend modules
!pip install mxnet



In [3]:
# import statements go here
from gluonts.dataset.common import load_datasets, ListDataset
from gluonts.dataset.field_names import FieldName

# import mxnet as mx
# from mxnet import gluon

import numpy as np
import pandas as pd
import os
import json
import pickle
from tqdm.autonotebook import tqdm
import matplotlib.pyplot as plt

  del sys.path[0]


## Load pre-processed training data

In [4]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [5]:
base_dir_path = '/gdrive/MyDrive/Forecasting - DSI Capstone Spring \'21/Colabs'

In [6]:
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.mx.trainer import Trainer
from gluonts.model.n_beats import NBEATSEnsembleEstimator
from gluonts.evaluation import Evaluator

In [7]:
single_prediction_length = 28
seed_value= 247
prediction_length = single_prediction_length

agg_level = 'product' # ['store' | 'product']
agg_dim = 'store_id' if agg_level == 'store' else 'item_id' # ['store_id' | 'item_id']

# start date of CA_1 training dataset: 2014-01-01
# v1: cutoff training at 2015-12-03
#   : validation range 2015-12-04 through 2015-12-31
# this may need to be pared down for cross validation later

train_end = '2015-12-03'
validation_end = '2015-12-31'

### Load test dataset

In [8]:
pkl_paths = dict(
    store={
      'training': os.path.join(base_dir_path, 'preprocessed_data/train_ca1_store.pkl'),
      'test': os.path.join(base_dir_path, 'preprocessed_data/test_ca1_store_no_features.pkl')
      },
    product={
      'training': os.path.join(base_dir_path, 'preprocessed_data/train_ca1_store_product.pkl'),
      'test': os.path.join(base_dir_path, 'preprocessed_data/test_ca1_store_product_no_features.pkl')
      },
)

In [9]:
# Loaded train dataset includes prediction horizon. (2016-01-01 + 27D)
train_df = pd.read_pickle(pkl_paths[agg_level]['training'])
test_df = pd.read_pickle(pkl_paths[agg_level]['test'])

# Comment out for entire product
single_cat_id = 'FOODS'  # [HOBBIES | HOUSEHOLD | FOODS]
train_df = train_df[train_df.cat_id == single_cat_id]
test_df = test_df[test_df.item_id.str.contains(single_cat_id)]

train_df = train_df.assign(sales=train_df.sales.astype('float32'))
train_df, validation_df, test_features_df = (
    train_df[train_df.date <= train_end], 
    train_df[(train_df.date > train_end) & (train_df.date <= validation_end)],
    train_df[train_df.date > validation_end]
)
# replace NaNs with test ground truth
if agg_level == 'store':
  test_features_df.iloc[:, test_features_df.columns.tolist().index('sales')] = test_df.sales.to_numpy()
else:
  test_features_df = test_features_df.merge(test_df[['item_id', 'd', 'sales']], on=['item_id', 'd'], suffixes=('', '_gt'))
  test_features_df = test_features_df.assign(sales=test_features_df.sales_gt).drop(columns=['sales_gt'])

m5_dates = train_df.groupby(agg_dim).date.min()

# # Need dataset to be in wide format for compatibility with A2 NBEATS implementation code
target_df = train_df.pivot(index=agg_dim, columns='date', values='sales')
target_val_df = validation_df.pivot(index=agg_dim, columns='date', values='sales')
target_test_df = test_df.pivot(index=agg_dim, columns='date', values='sales')

In [None]:
# single-item predictions
single_item_id = 'HOUSEHOLD_2_175'  # [None | 'FOODS_3_586' | 'HOUSEHOLD_2_266' | 'HOUSEHOLD_2_175']
if single_item_id:
    print(f'************************\nATTN: TRAINING MODEL FOR {single_item_id}\n************************')

    target_df = target_df[target_df.index.get_level_values(0)==single_item_id]
    target_val_df = target_val_df[target_val_df.index.get_level_values(0)==single_item_id]
    target_test_df = target_test_df[target_test_df.index.get_level_values(0)==single_item_id]

    train_df = train_df[train_df.item_id == single_item_id]
    validation_df = validation_df[validation_df.item_id == single_item_id]
    test_features_df = test_features_df[test_features_df.item_id == single_item_id]

    m5_dates = m5_dates[m5_dates.index.get_level_values(0) == single_item_id]

************************
ATTN: TRAINING MODEL FOR HOUSEHOLD_2_175
************************


In [10]:
# drop items that are not visible in training set
if agg_level == 'product':
  validation_drop = [idx for idx in target_val_df.index.to_numpy() if idx not in target_df.index.to_numpy()]
  test_drop = [idx for idx in target_test_df.index.to_numpy() if idx not in target_df.index.to_numpy()]

  target_val_df.drop(index=validation_drop, inplace=True)
  target_test_df.drop(index=test_drop, inplace=True)

In [11]:
# make sure all data set item numbers are equal
assert sum([df.shape[0] == target_df.shape[0] for df in [target_df, target_val_df, target_test_df]]) == 3

In [12]:
# Add features to DataSet
dynamic_real_cols = [col for col in train_df.columns if 'sales_' in col or 'rolling_' in col]
static_real_cols = [col for col in train_df.columns if 'enc_' in col]
dynamic_cat_cols = [col for col in train_df.columns if 'event_' in col or 'snap_' in col or 'tm_' in col]

In [13]:
# ensure order is preserved for all series
idxs = target_df.index.to_numpy()

In [15]:
# float16 (as saved by the pre-processing workflow) for some reason causes problems in pivot operations
col_to_convert = [dynamic_real_cols, static_real_cols, dynamic_cat_cols]
for col in col_to_convert:
  train_df[col] = train_df[col].astype('float32')
  validation_df[col] = validation_df[col].astype('float32')
  test_features_df[col] = test_features_df[col].astype('float32')

In [16]:
# Removed mean and std enc. data points for now (issues in training)
if agg_level == 'store':
  feat_dynamic_real_train = train_df[dynamic_real_cols].T.to_numpy()[None, :, :]
  # feat_static_real_train = train_df[static_real_cols].iloc[0].values[None, :]
  feat_dynamic_cat_train = train_df[dynamic_cat_cols].T.to_numpy()[None, :, :]

  feat_dynamic_real_validation = (pd.concat([train_df[dynamic_real_cols], validation_df[dynamic_real_cols]])).T.to_numpy()[None, :, :]
  # feat_static_real_validation = validation_df[static_real_cols].iloc[0].values[None, :]
  feat_dynamic_cat_validation = (pd.concat([train_df[dynamic_cat_cols], validation_df[dynamic_cat_cols]])).T.to_numpy()[None, :, :]

  feat_dynamic_real_test = (pd.concat([
                                      train_df[dynamic_real_cols], 
                                      validation_df[dynamic_real_cols],
                                      test_features_df[dynamic_real_cols]
                                      ])).T.to_numpy()[None, :, :]
  # feat_static_real_test = test_features_df[static_real_cols].iloc[0].values[None, :]
  feat_dynamic_cat_test = (pd.concat([
                                      train_df[dynamic_cat_cols], 
                                      validation_df[dynamic_cat_cols],
                                      test_features_df[dynamic_cat_cols]
                                      ])).T.to_numpy()[None, :, :]
else:
  # # ensure identical orders of products and dates

  # static real features removed
  # print('Creating features for training...')

  print('Training FDR feature processing')
  feat_dynamic_real_train = train_df[['item_id', 'date']+dynamic_real_cols].melt(
      id_vars=['item_id', 'date'],
      var_name='feature',
      value_name='value',
  ).set_index(['item_id', 'date', 'feature']).unstack(1)

  feat_dynamic_real_train = np.array([feat_dynamic_real_train.loc[iid].to_numpy() for iid in tqdm(idxs)])

  print('Training FDC feature processing')
  feat_dynamic_cat_train = train_df[['item_id', 'date']+dynamic_cat_cols].melt(
      id_vars=['item_id', 'date'],
      var_name='feature',
      value_name='value',
  ).set_index(['item_id', 'date', 'feature']).unstack(1)

  feat_dynamic_cat_train = np.array([feat_dynamic_cat_train.loc[iid].to_numpy() for iid in tqdm(idxs)])

  print('Validation FDR feature processing')
  feat_dynamic_real_validation = validation_df[['item_id', 'date']+dynamic_real_cols].melt(
      id_vars=['item_id', 'date'],
      var_name='feature',
      value_name='value',
  ).set_index(['item_id', 'date', 'feature']).unstack(1)

  feat_dynamic_real_validation = np.array([feat_dynamic_real_validation.loc[iid].to_numpy() for iid in tqdm(idxs)])

  print('Validation FDC feature processing')
  feat_dynamic_cat_validation = validation_df[['item_id', 'date']+dynamic_cat_cols].melt(
      id_vars=['item_id', 'date'],
      var_name='feature',
      value_name='value',
  ).set_index(['item_id', 'date', 'feature']).unstack(1)

  feat_dynamic_cat_validation = np.array([feat_dynamic_cat_validation.loc[iid].to_numpy() for iid in tqdm(idxs)])

  feat_dynamic_real_validation = np.append(feat_dynamic_real_train, feat_dynamic_real_validation, axis=2)
  feat_dynamic_cat_validation = np.append(feat_dynamic_cat_train, feat_dynamic_cat_validation, axis=2)

  print('Test FDR feature processing')
  feat_dynamic_real_test = test_features_df[['item_id', 'date']+dynamic_real_cols].melt(
      id_vars=['item_id', 'date'],
      var_name='feature',
      value_name='value',
  ).set_index(['item_id', 'date', 'feature']).unstack(1)

  feat_dynamic_real_test = np.array([feat_dynamic_real_test.loc[iid].to_numpy() for iid in tqdm(idxs)])

  print('Test FDC feature processing')
  feat_dynamic_cat_test = test_features_df[['item_id', 'date']+dynamic_cat_cols].melt(
      id_vars=['item_id', 'date'],
      var_name='feature',
      value_name='value',
  ).set_index(['item_id', 'date', 'feature']).unstack(1)

  feat_dynamic_cat_test = np.array([feat_dynamic_cat_test.loc[iid].to_numpy() for iid in tqdm(idxs)])

  feat_dynamic_real_test = np.append(feat_dynamic_real_validation, feat_dynamic_real_test, axis=2)
  feat_dynamic_cat_test = np.append(feat_dynamic_cat_validation, feat_dynamic_cat_test, axis=2)

Training FDR feature processing


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))


Training FDC feature processing


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))


Validation FDR feature processing


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))


Validation FDC feature processing


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))


Test FDR feature processing


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))


Test FDC feature processing


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))




In [None]:
# # save feature file in case of loss

# with open(os.path.join(base_dir_path, 'preprocessed_data/nbeats_data/features.pkl'), 'wb') as f:
#   pickle.dump({
#       'feat_dynamic_real_train': feat_dynamic_real_train,
#       'feat_dynamic_cat_train': feat_dynamic_cat_train,
#       'feat_dynamic_real_validation': feat_dynamic_real_validation,
#       'feat_dynamic_cat_validation': feat_dynamic_cat_validation,
#       'feat_dynamic_real_test': feat_dynamic_real_test,
#       'feat_dynamic_cat_test': feat_dynamic_cat_test,
#   }, f)

In [None]:
# # load saved feature file to save time
# with open(os.path.join(base_dir_path, 'preprocessed_data/nbeats_data/features.pkl'), 'rb') as f:
#   feature_dict = pickle.load(f)

# feat_dynamic_real_train, feat_dynamic_cat_train, feat_dynamic_real_validation = (feature_dict['feat_dynamic_real_train'], feature_dict['feat_dynamic_cat_train'], feature_dict['feat_dynamic_real_validation'])
# feat_dynamic_cat_validation, feat_dynamic_real_test, feat_dynamic_cat_test = (feature_dict['feat_dynamic_cat_validation'], feature_dict['feat_dynamic_real_test'], feature_dict['feat_dynamic_cat_test'])

In [17]:
if single_item_id == 'HOUSEHOLD_2_175':
    target_df = target_df.fillna(0)

In [18]:
# train_df = all_aggregates.drop(["id_str"], axis=1)
train_target_values = target_df.to_numpy()

# # A2 team implementation
# if SUBMISSION == True:
#   test_target_values = [np.append(ts, np.ones(prediction_length) * np.nan) for ts in train_df.values]
# else:
#   test_target_values = train_target_values.copy()
#   train_target_values = [ts[:-prediction_length] for ts in train_df.values]

validation_target_values = np.append(target_df.to_numpy(), target_val_df.to_numpy(), axis=1)
test_target_values =  np.append(validation_target_values, target_test_df, axis=1)

train_ds = ListDataset([
  {
    FieldName.ITEM_ID: id,
    FieldName.TARGET: target,
    FieldName.START: start,
    FieldName.FEAT_DYNAMIC_REAL: fdr,
    FieldName.FEAT_DYNAMIC_CAT: fdc,
    # FieldName.FEAT_STATIC_REAL: fsr,
  }
  for (id, target, start, fdr, fdc
      #  , fsr
       ) in zip(
    idxs,
    train_target_values,
    m5_dates,
    feat_dynamic_real_train,
    feat_dynamic_cat_train,
    # feat_static_real_train,
  )
  ], freq="D")

validation_ds = ListDataset([
  {
    FieldName.ITEM_ID: id,
    FieldName.TARGET: target,
    FieldName.START: start,
    FieldName.FEAT_DYNAMIC_REAL: fdr,
    FieldName.FEAT_DYNAMIC_CAT: fdc,
    # FieldName.FEAT_STATIC_REAL: fsr,
  }
  for (id, target, start, fdr, fdc
      #  , fsr
       ) in zip(
    idxs,
    validation_target_values,
    m5_dates,
    feat_dynamic_real_validation,
    feat_dynamic_cat_validation,
    # feat_static_real_validation,
  )
  ], freq="D")

test_ds = ListDataset([
  {
    FieldName.ITEM_ID: id,
    FieldName.TARGET: target,
    FieldName.START: start,
    FieldName.FEAT_DYNAMIC_REAL: fdr,
    FieldName.FEAT_DYNAMIC_CAT: fdc,
    # FieldName.FEAT_STATIC_REAL: fsr,
  }
  for (id, target, start, fdr, fdc
      #  , fsr
       ) in zip(
    idxs,
    test_target_values,
    m5_dates,
    feat_dynamic_real_test,
    feat_dynamic_cat_test,
    # feat_static_real_test,
  )
  ], freq="D")


### Model hyperparameters

In [21]:
trainer_params = dict(
    learning_rate=10e-3,  #A2 used 6e-4. Default is 10e-3
    epochs=12, #A2 used 12. default is 100
    # epochs=1, #A2 used 12. default is 100
    # num_batches_per_epoch=1000,  #A2 used 1000. Default is 50
    num_batches_per_epoch=50,  #A2 used 1000. Default is 50
    batch_size=16
)

# enter trainer_params separately
nbeats_train_params = dict(
  prediction_length=prediction_length,
  meta_bagging_size=3,
  # meta_bagging_size=1, # used for product-store level predictions
  meta_context_length=[prediction_length * mlp for mlp in [3,5,7]],
  # meta_loss_function=['sMAPE'],
  meta_loss_function=['MASE'], # used for product-store level predictions
  num_stacks=30,
  widths=[512],
)


In [22]:
# total number of models: |meta_context_length| x |meta_loss_function| x meta_bagging_size
estimator = NBEATSEnsembleEstimator(
  **nbeats_train_params,
  trainer=Trainer(**trainer_params),
  freq="D",
)

TRAINER:gluonts.mx.trainer._base.Trainer(avg_strategy=gluonts.mx.trainer.model_averaging.SelectNBestMean(maximize=False, metric="score", num_models=1), batch_size=16, clip_gradient=10.0, ctx=None, epochs=8, hybridize=True, init="xavier", learning_rate=0.01, learning_rate_decay_factor=0.5, minimum_learning_rate=5e-05, num_batches_per_epoch=50, patience=10, post_initialize_cb=None, weight_decay=1e-08)


In [23]:
# if SUBMISSION:
#   predictor = estimator.train(train_ds)
# else:
#   predictor = estimator.train(train_ds,test_ds) # this method raises an error at the library level

predictor = estimator.train(train_ds)

100%|██████████| 50/50 [00:16<00:00,  2.98it/s, epoch=1/8, avg_epoch_loss=2.79e+19]
100%|██████████| 50/50 [00:15<00:00,  3.17it/s, epoch=2/8, avg_epoch_loss=21.1]
100%|██████████| 50/50 [00:15<00:00,  3.16it/s, epoch=3/8, avg_epoch_loss=1.47]
100%|██████████| 50/50 [00:15<00:00,  3.17it/s, epoch=4/8, avg_epoch_loss=1.63]
100%|██████████| 50/50 [00:15<00:00,  3.18it/s, epoch=5/8, avg_epoch_loss=1.25]
100%|██████████| 50/50 [00:15<00:00,  3.18it/s, epoch=6/8, avg_epoch_loss=1.22]
100%|██████████| 50/50 [00:15<00:00,  3.18it/s, epoch=7/8, avg_epoch_loss=1.25]
100%|██████████| 50/50 [00:15<00:00,  3.17it/s, epoch=8/8, avg_epoch_loss=1.42]
100%|██████████| 50/50 [00:22<00:00,  2.26it/s, epoch=1/8, avg_epoch_loss=4.32e+17]
100%|██████████| 50/50 [00:21<00:00,  2.35it/s, epoch=2/8, avg_epoch_loss=1.64]
100%|██████████| 50/50 [00:21<00:00,  2.34it/s, epoch=3/8, avg_epoch_loss=1.49]
100%|██████████| 50/50 [00:21<00:00,  2.30it/s, epoch=4/8, avg_epoch_loss=1.27]
100%|██████████| 50/50 [00:21<00

### Save trained predictor, and forecast result

In [None]:
# save_fpath = os.path.join(base_dir_path, f'models/nbeats_predictor_{agg_level}.pkl')
# with open(save_fpath, 'wb') as f:
#   pickle.dump(predictor, f)

In [None]:
from pathlib import Path
predictor.serialize(Path(os.path.join(base_dir_path, 'models')))
# loading back saved model

# from gluonts.model.predictor import Predictor
# predictor_deserialized = Predictor.deserialize(Path(os.path.join(base_dir_path, 'models/nbeats_serialized')))



### Evaluate predictions

In [24]:
# A2 team's implementation of RMSSE. Added sqrt to add back "root" in RMSSE.
class M5Evaluator(Evaluator):

  def get_metrics_per_ts(self, time_series, forecast):
    successive_diff = np.diff(time_series.values.reshape(len(time_series)))
    successive_diff = successive_diff ** 2
    successive_diff = successive_diff[:-prediction_length]
    denom = np.mean(successive_diff)
    pred_values = forecast.samples.mean(axis=0)
    true_values = time_series.values.reshape(len(time_series))[-prediction_length:]
    num = np.mean((pred_values - true_values)**2)
    rmsse = np.sqrt(num / denom)
    metrics = super().get_metrics_per_ts(time_series, forecast)
    metrics["RMSSE"] = rmsse
    return metrics

  def get_aggregate_metrics(self, metric_per_ts):
    wrmsse = metric_per_ts["RMSSE"].mean()
    agg_metric , _ = super().get_aggregate_metrics(metric_per_ts)
    agg_metric["MRMSSE"] = wrmsse
    return agg_metric, metric_per_ts

In [25]:
def plot_prob_forecasts(ts_entry, forecast_entry, run_type, savefig=False):
  plot_length = 100
  prediction_intervals = (50.0, 90.0)
  legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

  fig, ax = plt.subplots(1, 1, figsize=(10, 7))
  ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
  forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
  plt.grid(which="both")
  plt.legend(legend, loc="upper left")

  if savefig:
    plt.savefig(os.path.join(base_dir_path, f'plots/nbeats_predictor_{agg_level}_{run_type}.png'), bbox_inches='tight')

  plt.show()


### Configure logging

In [26]:
import logging

logger = logging.getLogger('nbeats')
fhandler = logging.FileHandler(filename=os.path.join(base_dir_path, f'models/nbeats.log'), mode='a')
formatter = logging.Formatter('%(levelname)s %(pathname)s (%(asctime)s): %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [27]:
def record_results(tss, forecasts, agg_level, single_item_id=None, single_cat_id=None,run_type='validation'):
  """
  No plotting supported for product-level forecasting
  """
  d_list = [f'd_{n}' for n in range(1799, 1799+28)] if run_type == 'test' else [f'd_{n}' for n in range(1771, 1771+28)]

  if agg_level == 'store':
    date_series = test_features_df.date if run_type == 'test' else validation_df.date

    forecast_df = pd.DataFrame(dict(
        d=d_list,
        date=date_series,
        pred_sales=forecasts[0].samples.reshape(-1),
    ))

    forecast_df.assign(model='nbeats').to_csv(os.path.join(base_dir_path, f'predictions/nbeats_{agg_level}_{run_type}.csv'), index=False)
    plot_prob_forecasts(tss[0], forecasts[0], run_type, savefig=True)

  else:
    if single_item_id:
        save_fpath = f'predictions/nbeats_{agg_level}_{run_type}_{single_item_id}.csv'
        log_hparam = f'Hyperparam ({agg_level}-{run_type}-{single_item_id}): '
        log_results = f'Results ({agg_level}-{run_type}-{single_item_id}): '
    elif single_cat_id:
        save_fpath = f'predictions/nbeats_{agg_level}_{run_type}_{single_cat_id}.csv'
        log_hparam = f'Hyperparam ({agg_level}-{run_type}-{single_cat_id}): '
        log_results = f'Results ({agg_level}-{run_type}-{single_cat_id}): '
    else:
        save_fpath = f'predictions/nbeats_{agg_level}_{run_type}.csv'
        log_hparam = f'Hyperparam ({agg_level}-{run_type}): '
        log_results = f'Results ({agg_level}-{run_type}): '


    date_series = test_df.date.unique() if run_type == 'test' else validation_df.date.unique()
    date_df = pd.DataFrame(dict(d=d_list, date=date_series))

    forecast_df = pd.DataFrame(
        index=idxs,
        columns=date_series,
        data=np.array([fcst.samples.reshape(-1) for fcst in forecasts])
    ).reset_index()

    forecast_df.columns = ['item_id'] + forecast_df.columns[1:].tolist()
    forecast_df = forecast_df.melt(id_vars='item_id', var_name='date', value_name='sales')

    # add d numbers for context
    forecast_df = forecast_df.merge(date_df, how='left', on='date')
    forecast_df.assign(model='nbeats').to_csv(os.path.join(base_dir_path, save_fpath), index=False)
    print("Result plotting not supported for product-level forecasting")

  
  print(f"Predictions saved at {os.path.join(base_dir_path, save_fpath)}")

  n_series = len(test_ds) if run_type == 'test' else len(validation_ds)

  evaluator = Evaluator(quantiles=[0.5, 0.9])
  agg_metrics, _ = evaluator(iter(tss), iter(forecasts), num_series=n_series)

  m5_evaluator = M5Evaluator(quantiles=[0.5, 0.9])
  m5_agg_metrics, _ = m5_evaluator(iter(tss), iter(forecasts), num_series=n_series)

  logger.info(f'{log_hparam}{dict(nbeats_train_params, **trainer_params)}')
  logger.info(f'{log_results}RMSSE: {m5_agg_metrics["MRMSSE"]:.4f}, RMSE: {agg_metrics["RMSE"]:.4f}')

  return forecast_df

### Record model results

In [None]:
# Record validation results
forecast_it, ts_it = make_evaluation_predictions(
    dataset=validation_ds,
    predictor=predictor,
    num_samples=100
)

print("Obtaining time series conditioning values ...")
tss = list(tqdm(ts_it, total=len(validation_ds)))
print("Obtaining time series predictions ...")
forecasts = list(tqdm(forecast_it, total=len(validation_ds)))

# forecast_df = record_results(tss, forecasts, agg_level, run_type='validation')
forecast_df = record_results(tss, forecasts, agg_level, single_item_id=None, single_cat_id=single_cat_id, run_type='validation')


Obtaining time series conditioning values ...


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Obtaining time series predictions ...


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Running evaluation:   0%|          | 0/1 [00:00<?, ?it/s]


Result plotting not supported for product-level forecasting
Predictions saved at /gdrive/MyDrive/Forecasting - DSI Capstone Spring '21/Colabs/predictions/nbeats_product_validation_HOUSEHOLD_2_175.csv


Running evaluation: 100%|██████████| 1/1 [00:00<00:00,  3.79it/s]
Running evaluation: 100%|██████████| 1/1 [00:00<00:00,  4.12it/s]
INFO:nbeats:Hyperparam (product-validation-HOUSEHOLD_2_175): {'prediction_length': 28, 'meta_bagging_size': 3, 'meta_context_length': [84, 140, 196], 'meta_loss_function': ['MASE'], 'num_stacks': 30, 'widths': [512], 'learning_rate': 0.01, 'epochs': 12, 'num_batches_per_epoch': 50, 'batch_size': 16}
INFO:nbeats:Results (product-validation-HOUSEHOLD_2_175): RMSSE: 0.0883, RMSE: 0.0460


In [28]:
# Record test results, using model trained up to training cutoff
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,
    predictor=predictor,
    num_samples=100
)

print("Obtaining time series conditioning values ...")
tss = list(tqdm(ts_it, total=len(test_ds)))
print("Obtaining time series predictions ...")
forecasts = list(tqdm(forecast_it, total=len(test_ds)))

# forecast_df = record_results(tss, forecasts, agg_level, run_type='test')
forecast_df = record_results(tss, forecasts, agg_level, single_item_id=None, single_cat_id=single_cat_id, run_type='test')

Obtaining time series conditioning values ...


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))


Obtaining time series predictions ...


HBox(children=(FloatProgress(value=0.0, max=1433.0), HTML(value='')))

Running evaluation:   0%|          | 0/1433 [00:00<?, ?it/s]


Result plotting not supported for product-level forecasting
Predictions saved at /gdrive/MyDrive/Forecasting - DSI Capstone Spring '21/Colabs/predictions/nbeats_product_test_FOODS.csv


Running evaluation: 100%|██████████| 1433/1433 [00:00<00:00, 9658.19it/s]
Running evaluation: 100%|██████████| 1433/1433 [00:00<00:00, 14045.80it/s]
  if sys.path[0] == '':
INFO:nbeats:Hyperparam (product-test-FOODS): {'prediction_length': 28, 'meta_bagging_size': 3, 'meta_context_length': [84, 140, 196], 'meta_loss_function': ['MASE'], 'num_stacks': 30, 'widths': [512], 'learning_rate': 0.01, 'epochs': 8, 'num_batches_per_epoch': 50, 'batch_size': 16}
INFO:nbeats:Results (product-test-FOODS): RMSSE: inf, RMSE: 2.7860
