In [1]:
from typing import Any, Union, List, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, QuantileScaler
from sklearn.svm import SVR

In [7]:
print(list(pd.read_pickle("data/market_data.zip").columns))

['SPY_1DRET', 'SPY_1MRET', 'SPY_6MRET', 'SPY_1YRET', 'SPY_1DVOL', 'SPY_1WVOL', 'SPY_1MVOL', 'IWV_1DRET', 'IWV_1MRET', 'IWV_6MRET', 'IWV_1YRET', 'IWV_1DVOL', 'IWV_1WVOL', 'IWV_1MVOL', 'QQQ_1DRET', 'QQQ_1MRET', 'QQQ_6MRET', 'QQQ_1YRET', 'QQQ_1DVOL', 'QQQ_1WVOL', 'QQQ_1MVOL', 'IYF_1DRET', 'IYF_1MRET', 'IYF_6MRET', 'IYF_1YRET', 'IYF_1DVOL', 'IYF_1WVOL', 'IYF_1MVOL', 'XRT_1DRET', 'XRT_1MRET', 'XRT_6MRET', 'XRT_1YRET', 'XRT_1DVOL', 'XRT_1WVOL', 'XRT_1MVOL', 'XLP_1DRET', 'XLP_1MRET', 'XLP_6MRET', 'XLP_1YRET', 'XLP_1DVOL', 'XLP_1WVOL', 'XLP_1MVOL', 'XLU_1DRET', 'XLU_1MRET', 'XLU_6MRET', 'XLU_1YRET', 'XLU_1DVOL', 'XLU_1WVOL', 'XLU_1MVOL', 'XLV_1DRET', 'XLV_1MRET', 'XLV_6MRET', 'XLV_1YRET', 'XLV_1DVOL', 'XLV_1WVOL', 'XLV_1MVOL', 'IYT_1DRET', 'IYT_1MRET', 'IYT_6MRET', 'IYT_1YRET', 'IYT_1DVOL', 'IYT_1WVOL', 'IYT_1MVOL', 'GLD_1DRET', 'GLD_1MRET', 'GLD_6MRET', 'GLD_1YRET', 'GLD_1DVOL', 'GLD_1WVOL', 'GLD_1MVOL', 'SLV_1DRET', 'SLV_1MRET', 'SLV_6MRET', 'SLV_1YRET', 'SLV_1DVOL', 'SLV_1WVOL', 'SLV_1MVOL'

In [11]:
feature_columns = list([
    'SPY_1DRET', 'SPY_1MRET', 'SPY_6MRET', 'SPY_1YRET', 'SPY_1DVOL', 'SPY_1WVOL', 'SPY_1MVOL', 
    'IWV_1DRET', 'IWV_1MRET', 'IWV_6MRET', 'IWV_1YRET', 'IWV_1DVOL', 'IWV_1WVOL', 'IWV_1MVOL', 
    'QQQ_1DRET', 'QQQ_1MRET', 'QQQ_6MRET', 'QQQ_1YRET', 'QQQ_1DVOL', 'QQQ_1WVOL', 'QQQ_1MVOL', 
    'IYF_1DRET', 'IYF_1MRET', 'IYF_6MRET', 'IYF_1YRET', 'IYF_1DVOL', 'IYF_1WVOL', 'IYF_1MVOL', 
    'XRT_1DRET', 'XRT_1MRET', 'XRT_6MRET', 'XRT_1YRET', 'XRT_1DVOL', 'XRT_1WVOL', 'XRT_1MVOL', 
    'XLP_1DRET', 'XLP_1MRET', 'XLP_6MRET', 'XLP_1YRET', 'XLP_1DVOL', 'XLP_1WVOL', 'XLP_1MVOL', 
    'XLU_1DRET', 'XLU_1MRET', 'XLU_6MRET', 'XLU_1YRET', 'XLU_1DVOL', 'XLU_1WVOL', 'XLU_1MVOL', 
    'XLV_1DRET', 'XLV_1MRET', 'XLV_6MRET', 'XLV_1YRET', 'XLV_1DVOL', 'XLV_1WVOL', 'XLV_1MVOL', 
    'IYT_1DRET', 'IYT_1MRET', 'IYT_6MRET', 'IYT_1YRET', 'IYT_1DVOL', 'IYT_1WVOL', 'IYT_1MVOL', 
    'GLD_1DRET', 'GLD_1MRET', 'GLD_6MRET', 'GLD_1YRET', 'GLD_1DVOL', 'GLD_1WVOL', 'GLD_1MVOL', 
    'SLV_1DRET', 'SLV_1MRET', 'SLV_6MRET', 'SLV_1YRET', 'SLV_1DVOL', 'SLV_1WVOL', 'SLV_1MVOL', 
    'MXI_1DRET', 'MXI_1MRET', 'MXI_6MRET', 'MXI_1YRET', 'MXI_1DVOL', 'MXI_1WVOL', 'MXI_1MVOL', 
    'IGE_1DRET', 'IGE_1MRET', 'IGE_6MRET', 'IGE_1YRET', 'IGE_1DVOL', 'IGE_1WVOL', 'IGE_1MVOL', 
    'XLE_1DRET', 'XLE_1MRET', 'XLE_6MRET', 'XLE_1YRET', 'XLE_1DVOL', 'XLE_1WVOL', 'XLE_1MVOL', 
    '3M_TBILL', 'CPI', 'VIX', 'INDP', 'USHY_ADJ', 'US_LEADING', '30Y_FRMTG', '15Y_FRMTG', 'CPI_URBAN', 
    'RETAIL', 'PHARMA', 'UNEMP', 'UNEMP_PERM', 'UNEMP_MEN', 'UNEMP_WMN', 'UNEMP_WHT', 'UNEMP_BLK', 
    'UNEMP_HIS', 'INC', 'INC_DISP', 'INC_DISP_PC', 'TAX_HIGH', 'TAX_LOW'
])

target_columns = list([
    'SPY_1MRET', 'IWV_1MRET', 'QQQ_1MRET', 'IYF_1MRET', 'XRT_1MRET', 'XLP_1MRET', 'XLU_1MRET', 
    'XLV_1MRET', 'IYT_1MRET', 'GLD_1MRET', 'SLV_1MRET', 'MXI_1MRET', 'IGE_1MRET', 'XLE_1MRET',
    '3M_TBILL', 'CPI', 'VIX', 'INDP', 'USHY_ADJ', '30Y_FRMTG', '15Y_FRMTG', 'RETAIL', 'PHARMA', 'UNEMP', 'INC'
])


market_data = {
    "X" : pd.read_pickle("data/market_data.zip").loc[:, feature_columns],
    "y" : pd.read_pickle("data/market_data.zip").loc[:, target_columns].shift(-30)  # shift back by 1 month
}
market_data["X_train"] = market_data["X"].loc['2008-01-01':'2014-12-31', :]
market_data["y_train"] = market_data["y"].loc['2008-01-01':'2014-12-31', :]
market_data["X_train_all"] = market_data["X"].loc['2008-01-01':'2016-12-31', :]
market_data["y_train_all"] = market_data["y"].loc['2008-01-01':'2016-12-31', :]
market_data["X_held_out"] = market_data["X"].loc['2015-01-01':'2016-12-31', :]
market_data["y_held_out"] = market_data["y"].loc['2015-01-01':'2016-12-31', :]
market_data["X_test"] = market_data["X"].loc['2017-01-01':'2020-12-31', :]
market_data["y_test"] = market_data["y"].loc['2017-01-01':'2020-12-31', :]


In [4]:
market_data

Unnamed: 0,SPY_1DRET,SPY_1MRET,SPY_6MRET,SPY_1YRET,SPY_1DVOL,SPY_1WVOL,SPY_1MVOL,IWV_1DRET,IWV_1MRET,IWV_6MRET,...,UNEMP_MEN,UNEMP_WMN,UNEMP_WHT,UNEMP_BLK,UNEMP_HIS,INC,INC_DISP,INC_DISP_PC,TAX_HIGH,TAX_LOW
2008-01-01,-0.007455,-0.001320,-0.027807,0.044981,108126800.0,460201100.0,3.516696e+09,-0.008057,-0.002266,-0.033746,...,5.2,4.8,4.4,9.1,6.3,0.003478,0.001004,0.000289,35.0,10.0
2008-01-02,-0.008832,-0.020321,-0.040642,0.040544,204935600.0,619535300.0,3.498723e+09,-0.010899,-0.021063,-0.049407,...,5.2,4.8,4.4,9.1,6.3,0.003478,0.001004,0.000289,35.0,10.0
2008-01-03,-0.000483,-0.014085,-0.040051,0.041775,125133300.0,677575500.0,3.477426e+09,0.000359,-0.014138,-0.049267,...,5.2,4.8,4.4,9.1,6.3,0.003478,0.001004,0.000289,35.0,10.0
2008-01-04,-0.025122,-0.030269,-0.071785,0.015618,232330900.0,786924700.0,3.573223e+09,-0.027179,-0.035088,-0.082054,...,5.2,4.8,4.4,9.1,6.3,0.003478,0.001004,0.000289,35.0,10.0
2008-01-05,-0.025122,-0.030269,-0.071785,0.015618,232330900.0,786924700.0,3.573223e+09,-0.027179,-0.035088,-0.082054,...,5.2,4.8,4.4,9.1,6.3,0.003478,0.001004,0.000289,35.0,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-25,-0.024691,-0.003819,0.107333,0.200195,146670500.0,477043700.0,1.498977e+09,-0.026911,-0.000651,0.135465,...,6.3,6.1,5.6,9.9,8.5,-0.076037,-0.089178,-0.089311,37.0,10.0
2021-02-26,-0.005179,0.015643,0.093714,0.232159,152534900.0,546337600.0,1.528161e+09,-0.003834,0.020217,0.123676,...,6.3,6.1,5.6,9.9,8.5,-0.076037,-0.089178,-0.089311,37.0,10.0
2021-02-27,-0.005179,0.015643,0.093714,0.232159,152534900.0,546337600.0,1.528161e+09,-0.003834,0.020217,0.123676,...,6.3,6.1,5.6,9.9,8.5,-0.076037,-0.089178,-0.089311,37.0,10.0
2021-02-28,-0.005179,0.015643,0.093714,0.232159,152534900.0,546337600.0,1.528161e+09,-0.003834,0.020217,0.123676,...,6.3,6.1,5.6,9.9,8.5,-0.076037,-0.089178,-0.089311,37.0,10.0
