# Load Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
import itertools

In [2]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn import linear_model

In [3]:
idx = pd.read_csv('res_10.csv')


In [4]:
idx['ID'].unique()

array(['LUACTRUU Index', 'SPX Index', 'BCOM Index', 'LBUSTRUU Index',
       'LF98TRUU Index', 'LUATTRUU Index', 'LUMSTRUU Index',
       'LBEATREU Index', 'LP01TREU Index', 'LG30TRUU Index',
       'EMUSTRUU Index', 'LGTRTRUU Index', 'LGDRTRUU Index',
       'LUGCTRUU Index', 'LP06TREU Index', 'LF94TRUU Index',
       'LACHTRUU Index', 'LD08TRUU Index', 'LC07TRUU Index',
       'USYC2Y10 Index', 'LECPTREU Index'], dtype=object)

In [5]:
def return_correlation_ts(df,idx_1, idx_2, corr_window=30, ma_window=30):
    combined = pd.DataFrame()
    combined['DATE'] = df[df['ID'] == idx_1]['DATE']
    combined[idx_1] = df[df['ID'] == idx_1]['PX_LAST'].values
    combined[idx_2] = df[df['ID'] == idx_2]['PX_LAST'].values
    combined = combined[combined[idx_1].notna()]
    combined = combined[combined[idx_2].notna()]
    combined[f'corr_{idx_1}_{idx_2}'] = combined[idx_1].rolling(corr_window).corr(combined[idx_2])
    combined[f'MA_{ma_window}'] = combined[f'corr_{idx_1}_{idx_2}'].rolling(ma_window).mean()
    combined.index = pd.to_datetime(combined['DATE'].values)
    return combined

corr = return_correlation_ts(idx,'SPX Index', 'LUACTRUU Index')

In [6]:
def split_ts(ts, training_fold=2/3):
    train = int(ts.dropna().shape[0]*training_fold)
    y_train = ts.iloc[0:train].dropna()
    y_test = ts.iloc[train+1:-1].dropna()
    y_train = y_train.asfreq('D', method='bfill')
    y_test = y_test.asfreq('D', method='bfill')

    return y_train, y_test


In [7]:
y_train, y_test = split_ts(corr['MA_30'])


In [8]:
from dc_df.base import ExogenousDC

class HMMExogenousDC(ExogenousDC):

    def __init__(self, estimator, y_train, y_test):
        ExogenousDC.__init__(self,estimator, y_train, y_test)
        from hmmlearn import hmm

        self.exogenous_model = hmm.GaussianHMM(n_components=3, covariance_type="diag", n_iter=100)


# Fit models

In [9]:
results_data = []

In [10]:
from dc_df.model_registry import HMM_RF


# exogenous model Random Forest with HMM

regressor_rf = RandomForestRegressor(n_jobs=12)
hmm_exogenous = HMMExogenousDC(estimator=regressor_rf, y_train=y_train, y_test=y_test )
y_pred, X = hmm_exogenous.fit_predict()
accuracy, f1,fpr, tpr, area_under_the_curve = hmm_exogenous.evaluate()
results_data.append(
    ("RF_HMM", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

Even though the 'startprob_' attribute is set, it will be overwritten during initialization because 'init_params' contains 's'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Even though the 'startprob_' attribute is set, it will be overwritten during initialization because 'init_params' contains 's'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'


KeyboardInterrupt: 

In [11]:
# Random Forest Regression
from dc_df.base import RegressorDF
regressor_rf = RandomForestRegressor(n_jobs=12)

regressor_dc_rf  = RegressorDF(estimator=regressor_rf, y_train=y_train, y_test=y_test)

rf_pred = regressor_dc_rf.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dc_rf.evaluate()
results_data.append(
    ("RF", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)


KeyboardInterrupt: 

In [None]:
# SVR
regressor_svr = SVR()
regressor_df_svr  = RegressorDF(estimator=regressor_svr, y_train=y_train, y_test=y_test)

svr_pred = regressor_df_svr.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = regressor_df_svr.evaluate()
results_data.append(
    ("SVR", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

In [None]:
# SVR HMM
regressor_svr = SVR()
svr_hmm = HMMExogenousDC(estimator=regressor_svr, y_train=y_train, y_test=y_test )
y_pred, X = svr_hmm.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = svr_hmm.evaluate()
results_data.append(
    ("SVR_HMM", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

In [None]:
#Lasso regression
regressor_lasso = linear_model.Lasso()
regressor_dc_lasso  = RegressorDF(estimator=regressor_lasso, y_train=y_train, y_test=y_test)
lasso_pred  = regressor_dc_lasso.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dc_lasso.evaluate()
results_data.append(
    ("LASSO", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

# Summary Statistics

In [None]:
summary = pd.DataFrame(columns=['estimator','accuracy','f1','fpr','tpr','auc'], data=results_data)
summary