In [54]:
from own_models_train import get_final_paths, get_final_column_names
from eda_utils import get_latex_column_names
import utils
from estimators import *
from IPython.display import display, Markdown
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score


plt.rcParams.update(utils.get_plt_settings())

In [11]:
get_final_paths(gridsearch=False)

['RF_Clf_s10_default_h10',
 'RF_Regr_s10_default_h10',
 'XGB_Regr_s10_default_h10',
 'XGB_Clf_s10_default_h10']

In [12]:
models = {
    'RF_Clf_s10_default_h20': None,
    'RF_Regr_s10_default_h20': None,
    'XGB_Regr_s10_default_h20': None,
    'XGB_Clf_s10_default_h20': None,
}

In [14]:
for path in models.keys():
    print(path)
    feature_selection = get_final_column_names()
    models[path] = OwnEstimator(clf=None, path=path, feature_selection=feature_selection, hr_threshold=20)

RF_Clf_s10_default_h20
RF_Regr_s10_default_h20
XGB_Regr_s10_default_h20
XGB_Clf_s10_default_h20


In [6]:
import pandas as pd

df_healthy = pd.read_csv(utils.get_own_features_csv_path(segment_length=10, overlap_amount=0.9, data_folder='data_healthy'), index_col=False)

In [20]:
X = df_healthy[get_final_column_names()].copy()
y_label = df_healthy['error'] <= 20
error = df_healthy['error'].copy()

In [19]:
y_pred_rf_clf = models['RF_Clf_s10_default_h20'].predict(X)
y_pred_rf_regr = models['RF_Regr_s10_default_h20'].predict(X)
y_pred_xgb_clf = models['XGB_Clf_s10_default_h20'].predict(X)
y_pred_xgb_regr = models['XGB_Regr_s10_default_h20'].predict(X)

In [30]:
display(Markdown("## Daten"))
coverage = len(X[y_label].index)/len(X.index)*100
print("Coverage: %.2f" % coverage)
mae_all = np.mean(error)
print("MAE insgesamt: %.2f" % mae_all)
mae_inf = np.mean(error[y_label])
print("MAE informativ: %.2f" % mae_inf)

## Daten

Coverage: 83.67
MAE insgesamt: 13.76
MAE insgesamt: 4.34


In [57]:
display(Markdown("## RF Clf"))
coverage = len(X[y_pred_rf_clf].index)/len(X.index)*100
print("Coverage: %.2f" % coverage)
mae = np.mean(error[y_pred_rf_clf])
print("MAE: %.2f" % mae)
f1 = f1_score(y_label, y_pred_rf_clf)
print("F1-Score: %.2f" % f1)
acc = accuracy_score(y_label, y_pred_rf_clf)
print("Accuracy: %.2f" % acc)
print("AUC: %.2f" % roc_auc_score(y_label, models['RF_Clf_s10_default_h20'].clf.predict_proba(X)[:, 1]))

## RF Clf

Coverage: 83.96
MAE: 6.60
F1-Score: 0.91
Accuracy: 0.85
AUC: 0.80


In [58]:
display(Markdown("## XGB Clf"))
coverage = len(X[y_pred_xgb_clf].index)/len(X.index)*100
print("Coverage: %.2f" % coverage)
mae = np.mean(error[y_pred_xgb_clf])
print("MAE: %.2f" % mae)
f1 = f1_score(y_label, y_pred_xgb_clf)
print("F1-Score: %.2f" % f1)
acc = accuracy_score(y_label, y_pred_rf_clf)
print("Accuracy: %.2f" % acc)
print("AUC: %.2f" % roc_auc_score(y_label, models['XGB_Clf_s10_default_h20'].clf.predict_proba(X)[:, 1]))

## XGB Clf

Coverage: 81.58
MAE: 6.60
F1-Score: 0.90
Accuracy: 0.85
AUC: 0.80


In [59]:
display(Markdown("## RF Regr"))
coverage = len(X[y_pred_rf_regr].index)/len(X.index)*100
print("Coverage: %.2f" % coverage)
mae = np.mean(error[y_pred_rf_regr])
print("MAE: %.2f" % mae)
f1 = f1_score(y_label, y_pred_rf_regr)
print("F1-Score: %.2f" % f1)
acc = accuracy_score(y_label, y_pred_rf_regr)
print("Accuracy: %.2f" % acc)
print("AUC: %.2f" % roc_auc_score(y_label, models['RF_Regr_s10_default_h20'].clf.predict_proba(X)[:, 1]))

## RF Regr

Coverage: 91.33
MAE: 7.58
F1-Score: 0.93
Accuracy: 0.88
AUC: 0.81


In [60]:
display(Markdown("## XGB Regr"))
coverage = len(X[y_pred_xgb_regr].index)/len(X.index)*100
print("Coverage: %.2f" % coverage)
mae = np.mean(error[y_pred_xgb_regr])
print("MAE: %.2f" % mae)
f1 = f1_score(y_label, y_pred_xgb_regr)
print("F1-Score: %.2f" % f1)
acc = accuracy_score(y_label, y_pred_xgb_regr)
print("Accuracy: %.2f" % acc)
print("AUC: %.2f" % roc_auc_score(y_label, models['XGB_Regr_s10_default_h20'].clf.predict_proba(X)[:, 1]))

## XGB Regr

Coverage: 88.95
MAE: 7.34
F1-Score: 0.92
Accuracy: 0.87
AUC: 0.77
