In [None]:
import os,sys,inspect
# currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
# parentdir = os.path.dirname(currentdir)
# parentdir = os.path.dirname(parentdir)
parentdir = "C:\\Users\\cosim\\PycharmProjects\\DDFM"
sys.path.insert(0,parentdir)
import pickle
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
from implementation.model_validation import Validate, Ddfm_simple
from implementation.bai_ng_utils import transform_variables, untransform_variables
from tqdm import tqdm

In [None]:
start_date = pd.Timestamp("1990-01-01")
start_oos = pd.Timestamp("2005-01-01")

In [None]:
# data
data_m_all = pd.read_csv(os.path.join(parentdir, "examples", "empirical","data","mdfred_snapshot_monthly.csv"))
transform_code_m = data_m_all.iloc[0,1:].astype(int)
data_m = data_m_all.iloc[1:,:].set_index("sasdate")
data_m.index = pd.to_datetime(data_m.index)
# convert to month end
data_m.index = data_m.index + MonthEnd()
# transform data
data_m = transform_variables(data_m, transform_code_m)
data_m = data_m[data_m.index>=start_date]

In [None]:
# ddfms configs
hypers_asymmetric_ddfm = {'lags_input': [0, 3],
                          'structure_encoder': [[6*4, 6*2, 6],
                                                [5*4, 5*2, 5],
                                                [4*4, 4*2, 4],
                                                [3*4, 3*2, 3]
                                               ],
                          'link': ["tanh"],
                          'symmetric_decoder': [False],
                          }
hypers_symmetric_ddfm = hypers_asymmetric_ddfm.copy()
hypers_symmetric_ddfm["symmetric_decoder"] = [True]

In [None]:
# OOS
oos_dates = list(data_m.index[data_m.index >= start_oos])
# yearly re-estimation
estimate_every = 12
# t+1, t+6, t+12, t+24
steps_ahead = [1, 6, 12, 24]
# init predictions and the targets containers
dict_targets, dict_preds_symmetric, dict_preds_asymmetric = {}, {}, {}
for cc, vv in enumerate(steps_ahead):
    dict_preds_asymmetric[vv] = np.nan * np.ones((len(oos_dates), data_m.shape[1]))
    dict_preds_symmetric[vv] = np.nan * np.ones((len(oos_dates), data_m.shape[1]))
    dict_targets[vv] = np.nan * np.ones((len(oos_dates), data_m.shape[1]))
# oos loop
for c,v in enumerate(tqdm(oos_dates)):

    # up until end excluding v
    data_insample_now = data_m[data_m.index < v]
    
    # check if we need to fit the model
    if (c % estimate_every) == 0:
        hyper_tuning = Validate(data_insample_now, 
                                verbose=0,
                                n_jobs=14, 
                                test_size=18, 
                                n_steps_ahead=12, 
                                cv_type="tssplit", 
                                n_splits=min(2+c, 10)) # max 10 splits to reduce computational complexity
        ddfm_simple_inst = Ddfm_simple(n_steps_ahead=hyper_tuning.n_steps_ahead)
        model_ddfm_asymmetric = hyper_tuning.grid_search_cross_validate(ddfm_simple_inst, hypers_asymmetric_ddfm)
        model_ddfm_symmetric = hyper_tuning.grid_search_cross_validate(ddfm_simple_inst, hypers_symmetric_ddfm)

    # make predictions
    preds_asymmetric_now = model_ddfm_asymmetric.best_estimator_.predict(data_insample_now, n_steps_ahead=max(steps_ahead))
    preds_symmetric_now = model_ddfm_symmetric.best_estimator_.predict(data_insample_now, n_steps_ahead=max(steps_ahead))

    # store predictions and the targets
    for _, vv in enumerate(steps_ahead):
        dict_preds_asymmetric[vv][c, :] = preds_asymmetric_now[vv-1, -1, :]
        dict_preds_symmetric[vv][c, :] = preds_symmetric_now[vv-1, -1, :]
        dict_targets[vv][c, :] = data_m.shift(-(vv-1)).loc[v] # one step ahead is v without shift, two is v with one shift, ...

In [None]:
# compute rmsfe
def rmsfe(y, y_hat):
    return np.sqrt(np.nanmean((y - y_hat) ** 2))
var_names = list(data_m.columns)
dict_rmsfe = {}
for c, v in enumerate(steps_ahead):
    dict_rmsfe[f"Forecasting Horizon {v}"] = {}
    df_target = untransform_variables(data_tr=pd.DataFrame(dict_targets[v], columns=var_names, index=oos_dates), code=transform_code_m, fcst_h=v)
    df_preds_sym = untransform_variables(data_tr=pd.DataFrame(dict_preds_symmetric[v], columns=var_names, index=oos_dates), code=transform_code_m, fcst_h=v)
    df_preds_asym = untransform_variables(data_tr=pd.DataFrame(dict_preds_asymmetric[v], columns=var_names, index=oos_dates), code=transform_code_m, fcst_h=v)
    for c_var, var in enumerate(var_names):
        dict_rmsfe[f"Forecasting Horizon {v}"][var] = {"Symmetric": rmsfe(df_target[var_names[c_var]], df_preds_sym[var_names[c_var]]),
                                                       "Asymmetric": rmsfe(df_target[var_names[c_var]], df_preds_asym[var_names[c_var]])}

In [None]:
df = pd.DataFrame.from_dict({(i,j): dict_rmsfe[i][j] 
                            for i in dict_rmsfe.keys() 
                            for j in dict_rmsfe[i].keys()},
                            orient='index')
df.index = pd.MultiIndex.from_tuples(df.index)
df

In [None]:
df.to_csv(os.path.join(parentdir, "examples", "empirical", "results", 'ddfms_rmsfe.csv'))

In [None]:
dict_all_results = {"RMSFE": dict_rmsfe, 
                    "Preds Asymmetric": dict_preds_asymmetric, 
                    "Preds Symmetric": dict_preds_symmetric,
                    "Targets": dict_targets
                   }

In [None]:
# pickle everything
with open(os.path.join(parentdir, "examples", "empirical", "results", 'ddfms.pickle'), 'wb') as handle:
    pickle.dump(dict_all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# with open(os.path.join(parentdir, "examples", "empirical", "results", 'ddfms.pickle'), 'rb') as handle:
#     dict_all_results = pickle.load(handle)
# dict_preds_asymmetric = dict_all_results["Preds Asymmetric"]
# dict_preds_symmetric = dict_all_results["Preds Symmetric"]
# dict_targets = dict_all_results["Targets"]
# dict_rmsfe = dict_all_results["RMSFE"]