# Load Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
import itertools

In [2]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn import linear_model

In [3]:
idx = pd.read_csv('data/res_10.csv')


In [4]:
idx['ID'].unique()

array(['LUACTRUU Index', 'SPX Index', 'BCOM Index', 'LBUSTRUU Index',
       'LF98TRUU Index', 'LUATTRUU Index', 'LUMSTRUU Index',
       'LBEATREU Index', 'LP01TREU Index', 'LG30TRUU Index',
       'EMUSTRUU Index', 'LGTRTRUU Index', 'LGDRTRUU Index',
       'LUGCTRUU Index', 'LP06TREU Index', 'LF94TRUU Index',
       'LACHTRUU Index', 'LD08TRUU Index', 'LC07TRUU Index',
       'USYC2Y10 Index', 'LECPTREU Index'], dtype=object)

In [5]:
from data.load import get_y

y_train, y_test = get_y('SPX Index', 'LUACTRUU Index')


# Fit models

In [6]:
results_data = []
MLFLOW_RUN_ID = '1'

In [7]:
from data.save_to_mlflow import save_to_mlflow
from sktime.regression.dummy import DummyRegressor
from dc_df.base import RegressorDF
import mlflow

mlflow.set_tracking_uri("http://localhost:5000")
with mlflow.start_run():
    #dummy
    dummy = DummyRegressor()
    regressor_dummy  = RegressorDF(estimator=dummy, y_train=y_train, y_test=y_test)
    dummy_pred  = regressor_dummy.fit_predict()

    accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dummy.evaluate()
    tag = f"{model_name}-dataset:{dts_name}"
    mlflow.set_tag("mlflow.runName", tag )
    mlflow.log_metric('accuracy', accuracy)
    mlflow.log_metric('f1', f1)
    mlflow.log_metric('fpr', fpr)
    mlflow.log_metric('tpr', tpr)
    mlflow.log_metric('auc', auc)

    mlflow.end_run()


RestException: RESOURCE_DOES_NOT_EXIST: Run with id=1 not found

In [None]:
from dc_df.model_registry import HMMExogenousDC
from data.save_to_mlflow import save_to_mlflow

# exogenous model Random Forest with HMM

regressor_rf = RandomForestRegressor(n_jobs=12)
hmm_exogenous = HMMExogenousDC(estimator=regressor_rf, y_train=y_train, y_test=y_test, name= "RF_HMM" )
y_pred, X = hmm_exogenous.fit_predict()
accuracy, f1,fpr, tpr, area_under_the_curve = hmm_exogenous.evaluate()
results_data.append(
    ("RF_HMM", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

MLFLOW_RUN_ID = 1
save_to_mlflow(model_name=hmm_exogenous.get_name(), dts_name='SPX-LUACTRUU', accuracy=accuracy, f1=f1, fpr=fpr, tpr=tpr, auc=area_under_the_curve)


In [None]:
# Random Forest Regression
from dc_df.base import RegressorDF
regressor_rf = RandomForestRegressor(n_jobs=12)

regressor_dc_rf  = RegressorDF(estimator=regressor_rf, y_train=y_train, y_test=y_test)

rf_pred = regressor_dc_rf.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dc_rf.evaluate()
results_data.append(
    ("RF", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)


In [None]:
# SVR
regressor_svr = SVR()
regressor_df_svr  = RegressorDF(estimator=regressor_svr, y_train=y_train, y_test=y_test)

svr_pred = regressor_df_svr.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = regressor_df_svr.evaluate()
results_data.append(
    ("SVR", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

In [None]:
# SVR HMM
regressor_svr = SVR()
svr_hmm = HMMExogenousDC(estimator=regressor_svr, y_train=y_train, y_test=y_test )
y_pred, X = svr_hmm.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = svr_hmm.evaluate()
results_data.append(
    ("SVR_HMM", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

In [None]:
#Lasso regression
regressor_lasso = linear_model.Lasso()
regressor_dc_lasso  = RegressorDF(estimator=regressor_lasso, y_train=y_train, y_test=y_test)
lasso_pred  = regressor_dc_lasso.fit_predict()

accuracy, f1,fpr, tpr, area_under_the_curve = regressor_dc_lasso.evaluate()
results_data.append(
    ("LASSO", accuracy, f1,fpr[1], tpr[1], area_under_the_curve)
)

# Summary Statistics

In [None]:
summary = pd.DataFrame(columns=['estimator','accuracy','f1','fpr','tpr','auc'], data=results_data)
summary