In [1]:
import os,sys,inspect
# currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
# parentdir = os.path.dirname(currentdir)
# parentdir = os.path.dirname(parentdir)
parentdir = "C:\\Users\\cosim\\PycharmProjects\\DDFM"
sys.path.insert(0,parentdir)
import pickle
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
from implementation.model_validation import Validate, Ddfm_simple
from implementation.bai_ng_utils import transform_variables#, untransform_variables
from tqdm import tqdm

In [2]:
start_date = pd.Timestamp("1990-01-01")
start_oos = pd.Timestamp("2005-01-01")

In [3]:
# data
data_m_all = pd.read_csv(os.path.join(parentdir, "examples", "empirical","data","mdfred_snapshot_monthly.csv"))
transform_code_m = data_m_all.iloc[0,1:].astype(int)
# mapping second diff to first diff
# 6 goes to 5
transform_code_m[transform_code_m==6] = 5
# 3 goes to 2
transform_code_m[transform_code_m==3] = 2
data_m = data_m_all.iloc[1:,:].set_index("sasdate")
data_m.index = pd.to_datetime(data_m.index)
# convert to month end
data_m.index = data_m.index + MonthEnd()
# transform data
data_m = transform_variables(data_m, transform_code_m)
data_m = data_m[data_m.index>=start_date]

In [4]:
print(set(transform_code_m))

{1, 2, 4, 5, 7}


In [5]:
# ddfms configs
hypers_asymmetric_ddfm = {'lags_input': [0, 3],
                          'structure_encoder': [[6*4, 6*2, 6],
                                                [5*4, 5*2, 5],
                                                [4*4, 4*2, 4],
                                                [3*4, 3*2, 3]
                                               ],
                          'link': ["tanh", "relu"],
                          'symmetric_decoder': [False],
                          'seed': [1, 2, 3], 
                          }
hypers_symmetric_ddfm = hypers_asymmetric_ddfm.copy()
hypers_symmetric_ddfm["symmetric_decoder"] = [True]

In [6]:
# - CPIAUCSL: inflation all
# - PAYEMS: nfp all
# - RETAILx : retail sales
# - INDPRO: industrial production
# - RPI: real personal income
selected = ["CPIAUCSL", "PAYEMS", "RETAILx", "INDPRO", "RPI"]
idx_selected = [int(np.where(data_m.columns == i)[0]) for i in selected]
idx_selected.sort()

In [7]:
# OOS
oos_dates = list(data_m.index[data_m.index >= start_oos])
# yearly re-estimation
estimate_every = 12
# t+1, t+6, t+12, t+24
steps_ahead = [1, 6, 12, 24]
# init predictions and the targets containers
dict_targets, dict_preds_symmetric, dict_preds_asymmetric = {}, {}, {}
for cc, vv in enumerate(steps_ahead):
    dict_preds_asymmetric[vv] = np.nan * np.ones((len(oos_dates), data_m.shape[1]))
    dict_preds_symmetric[vv] = np.nan * np.ones((len(oos_dates), data_m.shape[1]))
    dict_targets[vv] = np.nan * np.ones((len(oos_dates), data_m.shape[1]))
# oos loop
for c,v in enumerate(tqdm(oos_dates)):
    
    # up until end excluding v
    data_insample_now = data_m[data_m.index < v]
    
    # check if we need to fit the model
    if (c % estimate_every) == 0:
        hyper_tuning = Validate(data_insample_now, 
                                verbose=0,
                                n_jobs=12, 
                                test_size=18, 
                                n_steps_ahead=12, 
                                cv_type="tssplit", 
                                n_splits=min(2+c, 6), # max 6 splits to reduce computational complexity
                                selected_vars=idx_selected,
                                )
        ddfm_simple_inst = Ddfm_simple(n_steps_ahead=hyper_tuning.n_steps_ahead)
        model_ddfm_asymmetric = hyper_tuning.grid_search_cross_validate(ddfm_simple_inst, hypers_asymmetric_ddfm)
        model_ddfm_symmetric = hyper_tuning.grid_search_cross_validate(ddfm_simple_inst, hypers_symmetric_ddfm)

    # make predictions
    preds_asymmetric_now = model_ddfm_asymmetric.best_estimator_.predict(data_insample_now, n_steps_ahead=max(steps_ahead))
    preds_symmetric_now = model_ddfm_symmetric.best_estimator_.predict(data_insample_now, n_steps_ahead=max(steps_ahead))

    # store predictions and the targets
    for _, vv in enumerate(steps_ahead):
        dict_preds_asymmetric[vv][c, :] = preds_asymmetric_now[vv-1, -1, :]
        dict_preds_symmetric[vv][c, :] = preds_symmetric_now[vv-1, -1, :]
        dict_targets[vv][c, :] = data_m.shift(-(vv-1)).loc[v] # one step ahead is v without shift, two is v with one shift, ...

  0%|                                                                                                                                                      | 0/226 [00:00<?, ?it/s]

@Info - Note: Sorting data.
@Info: Convergence achieved in 28 iterations - new loss: 0.524219566940718 - delta: 0.000297129568014953 < 0.0005
@Info - Note: Sorting data.
@Info: Convergence achieved in 4 iterations - new loss: 0.5099807201776747 - delta: 0.00035241075107479887 < 0.0005


  5%|███████▍                                                                                                                                   | 12/226 [30:37<2:12:40, 37.20s/it]

@Info - Note: Sorting data.
@Info: Convergence achieved in 21 iterations - new loss: 0.5688067869778966 - delta: 0.00047899464353587184 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 9 iterations - new loss: 0.5076811875250326 - delta: 1.0371575065117781e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 26 iterations - new loss: 0.7525597217762328 - delta: 8.220335252965945e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 17 iterations - new loss: 0.7237059687999349 - delta: 0.00021301636888183782 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 17 iterations - new loss: 0.5145544963315182 - delta: 0.0002733611862391745 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 7 iterations - new loss: 0.5740945477601123 - delta: 5.0148564694831594e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 25 iterations - new loss: 0.44047071960901946 - delta: 0.00044327646980992934 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 13 iterations - new loss: 0.5444535820504663 - delta: 1.3471859486475047e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 23 iterations - new loss: 0.5123377359974413 - delta: 0.000398003874163079 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 10 iterations - new loss: 0.5598811955033104 - delta: 5.523790406990584e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 19 iterations - new loss: 0.5282018438277346 - delta: 2.7812601194954338e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 9 iterations - new loss: 0.4875433654421025 - delta: 0.00047683612451561046 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 26 iterations - new loss: 0.543794256515085 - delta: 0.00033513798345047275 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 12 iterations - new loss: 0.643607034488184 - delta: 0.0004990700875962351 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 12 iterations - new loss: 0.5695669105959176 - delta: 0.0001432604332178901 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 4 iterations - new loss: 0.4932115151688518 - delta: 0.00015356629077787744 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 19 iterations - new loss: 0.44924706051366253 - delta: 0.00026380652591895227 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 5 iterations - new loss: 0.5379019847397385 - delta: 0.0004662865308320254 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 22 iterations - new loss: 0.3898712436986139 - delta: 0.0002893123896714179 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 4 iterations - new loss: 0.6663399297773052 - delta: 0.00039147445216143807 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 20 iterations - new loss: 0.5974968202765628 - delta: 0.00021062819725334785 < 0.0005
@Info - Note: Sorting data.
@Info: Convergence achieved in 4 iterations - new loss: 0.5069114400258589 - delta: 0.0004607210786532817 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 21 iterations - new loss: 0.557198719388565 - delta: 0.00019699812549646 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 3 iterations - new loss: 0.5062426391315129 - delta: 0.00046651885603386 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 23 iterations - new loss: 0.3906877542274399 - delta: 0.0004164963382982029 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 8 iterations - new loss: 0.506310734517408 - delta: 8.634220904887521e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 21 iterations - new loss: 0.41618437255656776 - delta: 8.516830378851853e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 6 iterations - new loss: 0.6644071832833722 - delta: 0.00013310798174719372 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 12 iterations - new loss: 0.5651773767846974 - delta: 0.00011774041831954392 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 10 iterations - new loss: 0.511124053407039 - delta: 0.0003122819525233991 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 18 iterations - new loss: 0.39688339064659245 - delta: 0.00021721784846182195 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 16 iterations - new loss: 0.4374455090072163 - delta: 0.0002508898650933927 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 21 iterations - new loss: 0.3923516498568554 - delta: 3.6942453679419735e-05 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 17 iterations - new loss: 0.5374905617799992 - delta: 0.00023751932857674678 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 7 iterations - new loss: 0.3650927067176237 - delta: 0.00014468210147445214 < 0.0005




@Info - Note: Sorting data.
@Info: Convergence achieved in 17 iterations - new loss: 0.49162661051190193 - delta: 0.00043669931698105385 < 0.0005


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 226/226 [33:10:24<00:00, 528.43s/it]


In [8]:
# compute rmsfe
def rmsfe(y, y_hat):
    return np.sqrt(np.nanmean((y - y_hat) ** 2))
var_names = list(data_m.columns)
dict_rmsfe = {}
for c, v in enumerate(steps_ahead):
    dict_rmsfe[f"Forecasting Horizon {v}"] = {}
    #df_target = untransform_variables(data_tr=pd.DataFrame(dict_targets[v], columns=var_names, index=oos_dates), code=transform_code_m, fcst_h=v)
    #df_preds_sym = untransform_variables(data_tr=pd.DataFrame(dict_preds_symmetric[v], columns=var_names, index=oos_dates), code=transform_code_m, fcst_h=v)
    #df_preds_asym = untransform_variables(data_tr=pd.DataFrame(dict_preds_asymmetric[v], columns=var_names, index=oos_dates), code=transform_code_m, fcst_h=v)
    df_target = pd.DataFrame(dict_targets[v], columns=var_names, index=oos_dates)
    df_preds_sym = pd.DataFrame(dict_preds_symmetric[v], columns=var_names, index=oos_dates)
    df_preds_asym = pd.DataFrame(dict_preds_asymmetric[v], columns=var_names, index=oos_dates)
    for c_var, var in enumerate(var_names):
        dict_rmsfe[f"Forecasting Horizon {v}"][var] = {"Symmetric": rmsfe(df_target[var_names[c_var]], df_preds_sym[var_names[c_var]]),
                                                       "Asymmetric": rmsfe(df_target[var_names[c_var]], df_preds_asym[var_names[c_var]])}

In [9]:
df = pd.DataFrame.from_dict({(i,j): dict_rmsfe[i][j] 
                            for i in dict_rmsfe.keys() 
                            for j in dict_rmsfe[i].keys()},
                            orient='index')
df.index = pd.MultiIndex.from_tuples(df.index)
df

Unnamed: 0,Unnamed: 1,Symmetric,Asymmetric
Forecasting Horizon 1,RPI,0.017962,0.019328
Forecasting Horizon 1,W875RX1,0.009793,0.009366
Forecasting Horizon 1,DPCERA3M086SBEA,0.013495,0.013254
Forecasting Horizon 1,CMRMTSPLx,0.017515,0.017297
Forecasting Horizon 1,RETAILx,0.024285,0.025107
...,...,...,...
Forecasting Horizon 24,UMCSENTx,4.859635,4.636464
Forecasting Horizon 24,DTCOLNVHFNM,0.024440,0.026114
Forecasting Horizon 24,DTCTHFNM,0.029303,0.030531
Forecasting Horizon 24,INVEST,0.010495,0.010294


In [10]:
df.to_csv(os.path.join(parentdir, "examples", "empirical", "results", 'ddfms_rmsfe.csv'))

In [11]:
dict_all_results = {"RMSFE": dict_rmsfe, 
                    "Preds Asymmetric": dict_preds_asymmetric, 
                    "Preds Symmetric": dict_preds_symmetric,
                    "Targets": dict_targets
                   }

In [12]:
# pickle everything
with open(os.path.join(parentdir, "examples", "empirical", "results", 'ddfms.pickle'), 'wb') as handle:
    pickle.dump(dict_all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
# with open(os.path.join(parentdir, "examples", "empirical", "results", 'ddfms.pickle'), 'rb') as handle:
#     dict_all_results = pickle.load(handle)
# dict_preds_asymmetric = dict_all_results["Preds Asymmetric"]
# dict_preds_symmetric = dict_all_results["Preds Symmetric"]
# dict_targets = dict_all_results["Targets"]
# dict_rmsfe = dict_all_results["RMSFE"]