# Load Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
import itertools

In [2]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn import linear_model

In [3]:
idx = pd.read_csv('data/res_10.csv')


In [4]:
idx['ID'].unique()

array(['LUACTRUU Index', 'SPX Index', 'BCOM Index', 'LBUSTRUU Index',
       'LF98TRUU Index', 'LUATTRUU Index', 'LUMSTRUU Index',
       'LBEATREU Index', 'LP01TREU Index', 'LG30TRUU Index',
       'EMUSTRUU Index', 'LGTRTRUU Index', 'LGDRTRUU Index',
       'LUGCTRUU Index', 'LP06TREU Index', 'LF94TRUU Index',
       'LACHTRUU Index', 'LD08TRUU Index', 'LC07TRUU Index',
       'USYC2Y10 Index', 'LECPTREU Index'], dtype=object)

In [5]:
corr_pairs = [
    ('SPX Index','LUACTRUU Index'),
    ('SPX Index','BCOM Index'),
    ('LUACTRUU Index', 'BCOM Index')
]

In [6]:
from data.load import get_y

y_train, y_test = get_y('SPX Index', 'LUACTRUU Index')


# Fit models

In [7]:
results_data = []
MLFLOW_EXPERIMENT_ID = '0'

In [8]:
from sktime.regression.dummy import DummyRegressor
from dc_df.base import RegressorDF
import mlflow
from data.save_to_mlflow import save_to_mlflow

from dc_df.model_registry import HMMExogenousDC

from dc_df.base import RegressorDF

from data.load import get_y


In [9]:
for cp in corr_pairs:
    y_train, y_test = get_y(cp[0], cp[1])


    #dummy
    # dummy = DummyRegressor()
    # regressor_dummy  = RegressorDF(estimator=dummy, y_train=y_train, y_test=y_test,  name= "Dummy_REG" )
    # dummy_pred  = regressor_dummy.fit_predict()

    # accuracy, f1,fpr, tpr, auc = regressor_dummy.evaluate()

    # save_to_mlflow(model_name=regressor_dummy.get_name(), dts_name=f"{cp[0]}-{cp[1]}", accuracy=accuracy, f1=f1, fpr=fpr[1], tpr=tpr[1], auc=auc,MLFLOW_EXPERIMENT_ID=MLFLOW_EXPERIMENT_ID)


    # exogenous model Random Forest with HMM

    regressor_rf = RandomForestRegressor(n_jobs=12)
    hmm_exogenous = HMMExogenousDC(estimator=regressor_rf, y_train=y_train, y_test=y_test, name= "RF_HMM" )
    y_pred, X = hmm_exogenous.fit_predict()
    accuracy, f1,fpr, tpr, area_under_the_curve = hmm_exogenous.evaluate()


    save_to_mlflow(model_name=hmm_exogenous.get_name(), dts_name=f"{cp[0]}-{cp[1]}", accuracy=accuracy, f1=f1, fpr=fpr[1], tpr=tpr[1], auc=area_under_the_curve,MLFLOW_EXPERIMENT_ID=MLFLOW_EXPERIMENT_ID)


    # Random Forest Regression
    regressor_rf = RandomForestRegressor(n_jobs=12)

    regressor_dc_rf  = RegressorDF(estimator=regressor_rf, y_train=y_train, y_test=y_test, name="RF")

    rf_pred = regressor_dc_rf.fit_predict()

    accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dc_rf.evaluate()
    save_to_mlflow(model_name=regressor_dc_rf.get_name(), dts_name=f"{cp[0]}-{cp[1]}", accuracy=accuracy, f1=f1, fpr=fpr[1], tpr=tpr[1], auc=area_under_the_curve, MLFLOW_EXPERIMENT_ID=MLFLOW_EXPERIMENT_ID)



    # SVR
    regressor_svr = SVR()
    regressor_df_svr  = RegressorDF(estimator=regressor_svr, y_train=y_train, y_test=y_test, name="SVR")

    svr_pred = regressor_df_svr.fit_predict()

    accuracy, f1,fpr, tpr, area_under_the_curve = regressor_df_svr.evaluate()
    save_to_mlflow(model_name=regressor_df_svr.get_name(), dts_name=f"{cp[0]}-{cp[1]}", accuracy=accuracy, f1=f1, fpr=fpr[1], tpr=tpr[1], auc=area_under_the_curve, MLFLOW_EXPERIMENT_ID=MLFLOW_EXPERIMENT_ID)


    # SVR HMM
    regressor_svr = SVR()
    svr_hmm = HMMExogenousDC(estimator=regressor_svr, y_train=y_train, y_test=y_test, name="SVR_HMM" )
    y_pred, X = svr_hmm.fit_predict()

    accuracy, f1,fpr, tpr, area_under_the_curve = svr_hmm.evaluate()
    save_to_mlflow(model_name=svr_hmm.get_name(), dts_name=f"{cp[0]}-{cp[1]}", accuracy=accuracy, f1=f1, fpr=fpr[1], tpr=tpr[1], auc=area_under_the_curve, MLFLOW_EXPERIMENT_ID=MLFLOW_EXPERIMENT_ID)


    #Lasso regression
    regressor_lasso = linear_model.Lasso()
    regressor_dc_lasso  = RegressorDF(estimator=regressor_lasso, y_train=y_train, y_test=y_test, name="LASSO")
    lasso_pred  = regressor_dc_lasso.fit_predict()

    accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dc_lasso.evaluate()
    save_to_mlflow(model_name=regressor_dc_lasso.get_name(), dts_name=f"{cp[0]}-{cp[1]}", accuracy=accuracy, f1=f1, fpr=fpr[1], tpr=tpr[1], auc=area_under_the_curve,MLFLOW_EXPERIMENT_ID=MLFLOW_EXPERIMENT_ID)



Even though the 'startprob_' attribute is set, it will be overwritten during initialization because 'init_params' contains 's'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Even though the 'startprob_' attribute is set, it will be overwritten during initialization because 'init_params' contains 's'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'


# Summary Statistics

In [None]:
summary = pd.DataFrame(columns=['estimator','accuracy','f1','fpr','tpr','auc'], data=results_data)
summary