In [97]:
import pandas as pd

from ESRNN.m4_data import prepare_m4_data, seas_dict
from ESRNN.utils_evaluation import evaluate_prediction_owa

from fforma import FForma

from functools import partial
import multiprocessing as mp
import glob
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [83]:
train_errors = pd.read_csv('R/data/errrors-fforma.csv').set_index('unique_id').sort_index()
train_feats = pd.read_csv('R/data/feats-fforma.csv').set_index('unique_id').sort_index()

In [86]:
py_feats = [pd.read_csv(file) for file in glob.glob('*features.csv')]
py_feats = pd.concat(py_feats, sort=True).fillna(0)
py_feats = py_feats[py_feats['unique_id'].isin(train_errors.index)].set_index('unique_id')

In [87]:
model = FForma(max_evals=10).train(train_errors, py_feats)

Preparing training phase
[0]	train-FFORMA-loss:0.940714	eval-FFORMA-loss:0.93774
Multiple eval metrics have been passed: 'eval-FFORMA-loss' will be used for early stopping.

Will train until eval-FFORMA-loss hasn't improved in 10 rounds.
[1]	train-FFORMA-loss:0.901441	eval-FFORMA-loss:0.89912
[2]	train-FFORMA-loss:0.868957	eval-FFORMA-loss:0.867473
[3]	train-FFORMA-loss:0.844261	eval-FFORMA-loss:0.843455
[4]	train-FFORMA-loss:0.824777	eval-FFORMA-loss:0.824395
[5]	train-FFORMA-loss:0.809989	eval-FFORMA-loss:0.809932
[6]	train-FFORMA-loss:0.798918	eval-FFORMA-loss:0.799336
[7]	train-FFORMA-loss:0.789666	eval-FFORMA-loss:0.79047
[8]	train-FFORMA-loss:0.782122	eval-FFORMA-loss:0.783202
[9]	train-FFORMA-loss:0.77581	eval-FFORMA-loss:0.777168
[10]	train-FFORMA-loss:0.770999	eval-FFORMA-loss:0.772733
[11]	train-FFORMA-loss:0.766912	eval-FFORMA-loss:0.769032
[12]	train-FFORMA-loss:0.763428	eval-FFORMA-loss:0.765869
[13]	train-FFORMA-loss:0.760473	eval-FFORMA-loss:0.763249
[14]	train-FFORMA-lo

In [88]:
feats = pd.read_csv('R/data/pred-feats-fforma.csv').set_index('unique_id').sort_index()
preds = pd.read_csv('R/data/preds-fforma.csv').set_index(['unique_id', 'ds']).sort_index()

In [90]:
fforma_preds = model.predict(preds, py_feats).reset_index()

In [91]:
fforma_preds.to_csv('preds-fforma-py-feats.csv')

In [93]:
def evaluate_fforma(dataset_name, fforma_df, directory, num_obs):
    _, y_train_df, X_test_df, y_test_df = prepare_m4_data(dataset_name=dataset_name,
                                                          directory=directory,
                                                          num_obs=num_obs)
    
    y_test_fforma = fforma_preds[fforma_df['unique_id'].isin(y_test_df['unique_id'].unique())]
    y_test_fforma = y_test_fforma.rename(columns={'fforma_prediction': 'y_hat'})
    y_test_fforma = y_test_fforma.filter(items=['unique_id', 'ds', 'y_hat'])
    
    seasonality = seas_dict[dataset_name]['seasonality']
    owa, mase, smape = evaluate_prediction_owa(y_test_fforma, y_train_df, X_test_df, y_test_df, seasonality)
    
    return dataset_name, owa, mase, smape

In [94]:
evaluate_fforma_p = partial(evaluate_fforma, fforma_df=fforma_preds, directory='./data', num_obs=100000)

In [95]:
with mp.Pool() as pool:
    eval_fforma = pool.map(evaluate_fforma_p, seas_dict.keys())













OWA: 0.683 
SMAPE: 14.098 
MASE: 1.436 
OWA: 0.973 
SMAPE: 8.432 
MASE: 0.532 
OWA: 0.834 
SMAPE: 13.597 
MASE: 3.118 
OWA: 1.0 
SMAPE: 3.084 
MASE: 1.152 
OWA: 0.875 
SMAPE: 10.022 
MASE: 1.151 
OWA: 0.903 
SMAPE: 12.865 
MASE: 0.971 


In [96]:
eval_fforma

[('Hourly', 0.683287865587445, 1.436254875847373, 14.097771597640307),
 ('Daily', 0.9995168346482247, 1.1517943107085338, 3.084400330286038),
 ('Weekly', 0.973069447353054, 0.5324489269438113, 8.432148891940633),
 ('Monthly', 0.9025758694192698, 0.9714008898874792, 12.865084663745513),
 ('Quarterly', 0.8747380566949765, 1.1511082378244113, 10.022329802619481),
 ('Yearly', 0.8338891637472919, 3.117534971696027, 13.597166736220867)]