In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
import numpy as np
import scipy as sc
from scipy import stats
import pandas as pd
import pickle
import tqdm

from typing import List, Callable

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer

#from rulefit import RuleFit
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import BayesianRidge, LinearRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, TunedThresholdClassifierCV
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
#from category_encoders import JamesSteinEncoder, CatBoostEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.calibration import calibration_curve, CalibratedClassifierCV

from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, confusion_matrix, roc_curve, auc

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import matplotlib.pyplot as plt

# add ../src to python path
sys.path.insert(0, os.path.join(os.path.abspath('.'),'..', 'src'))

import tree_utils

In [None]:
# Try from sklego.meta import HierarchicalClassifier
# hc = HierarchicalClassifier(
#    estimator=LogisticRegression(),
#    groups=groups
#).fit(X, y)
#hc.estimators_

In [None]:
data_path =  r"J:\Onderzoek\21-763_rvanes_MiniECG-2-Data\E_ResearchData\2_ResearchData\Analysis"
file_name =  r"input_decision_tree_complete.pkl"

num_splits = 10
num_repeats = 10
USE_CLASS_WEIGHT = True
ALL_FEATURES = True
USE_REDUCED_LABELS = True
ALL_FEATURES_STRING = "_useAllvars" if ALL_FEATURES else ""
CLASS_WEIGHT_STRING = "_withClassWeights" if USE_CLASS_WEIGHT else ""
REDUCED_LABEL_STRING = "_withReducedLabels" if USE_REDUCED_LABELS else ""

In [None]:
with open(os.path.join(data_path, file_name), 'rb') as f:
    input_decision_tree_complete = pickle.load(f)

In [None]:
DATA = pd.DataFrame(input_decision_tree_complete).T

In [None]:
morphology_columns = [c for c in DATA.columns if 'morphology' in c]
for c in morphology_columns:
    DATA.loc[:, c] = DATA[c].apply(lambda x: x[0].strip(",").strip(" "))
    DATA.loc[:, c] = DATA[c].apply(lambda x: x if x.strip()!="" else "none")

In [None]:
morphology_values = []
for c in morphology_columns:
    morphology_values.extend(DATA[c].unique().tolist())
morphology_values = list(set(morphology_values))

# Model prepping

In [None]:
# TODO: need to add a feature combiner, perhaps use PySR or GpLearn

impute_kwargs = {
    'estimator': LinearRegression(), 
    'random_state':7,
    'imputation_order': 'ascending', 
    'skip_complete': False,
    'max_iter': 250,
    'initial_strategy': 'median',
    'add_indicator': True
}
gradientboosting_kwargs = {
    'n_estimators': 500, 
    'max_depth': 10, 
    'learning_rate':0.01,
    'max_leaf_nodes':40,
    'random_state': 7
}
randomforest_kwargs = {
    'n_estimators': 100,
    'max_depth': 6,
    'min_samples_split': 2, 
    'min_samples_leaf': 1,
    'random_state': 7
}
rulefit_kwargs={
    'tree_size': 10,
    'max_rules': 100,
    'tree_generator': GradientBoostingClassifier(**gradientboosting_kwargs)
}
decisiontree_kwargs = {
    'criterion':'gini', 
    'splitter':'best', 
    'max_depth':10, 
    'min_samples_split':10, 
    'min_samples_leaf': 5, 
    'min_weight_fraction_leaf':0.0, 
    'max_features':None, 
    'random_state':7, 
    'max_leaf_nodes':50,
    'class_weight': 'balanced'
}
xgboost_kwargs = {
    'n_estimators': 150,
    'max_depth': 6,
    'max_leaves': 50,
    'learning_rate': 1e-3,
    'gamma': 0.4,
    'subsample': 0.55,
    'colsample_bytree':0.85,
    'reg_alpha': 0.005
    }

# Axis model

In [None]:
target_col = "Heart Axis Diagnosis"
target_inclusion = ['Left', 'Normal', 'Right', 'Extreme']
Reduction_map = {'Left': 'Disease', 
                 'Right': 'Disease',
                 'Extreme': 'Disease',
                 'Normal': 'Normal'}
if ALL_FEATURES:
    features_to_use = []
else:
    features_to_use = ['qrs_vector mean lead_0',
                     'p_vector mean lead_0',
                     't_vector mean lead_0',
                     'qrs_vector mean lead_1',
                     'p_vector mean lead_1',
                     't_vector mean lead_1',
                     'qrs_vector mean lead_2',
                     'p_vector mean lead_2',
                     't_vector mean lead_2',
                     'qrs_vector mean lead_3',
                     'p_vector mean lead_3',
                     't_vector mean lead_3',
                     'qrs_vector mean lead_4',
                     'p_vector mean lead_4',
                     't_vector mean lead_4',
                     'qrs_vector mean lead_5',
                     'p_vector mean lead_5',
                     't_vector mean lead_5',
                     'qrs_vector mean lead_6',
                     'p_vector mean lead_6',
                     't_vector mean lead_6',
                     'qrs_vector mean lead_7',
                     'p_vector mean lead_7',
                     't_vector mean lead_7'
                       ]

In [None]:
if len(features_to_use)==0:
    meas_cols = [c for c in DATA.columns if ('Dataset' not in c) 
                 & (target_col not in c)
                 & ('Diagnosis' not in c)]
else:
    meas_cols = features_to_use
    
fstring = f"AXIS_{CLASS_WEIGHT_STRING}{ALL_FEATURES_STRING}{REDUCED_LABEL_STRING}"
os.makedirs(os.path.join(data_path, fstring), exist_ok=True)

AXIS_DATA = DATA.loc[DATA[target_col].isin(target_inclusion), meas_cols+[target_col]+['Dataset']]
if USE_REDUCED_LABELS:
    AXIS_DATA.loc[:, target_col] = AXIS_DATA[target_col].map(Reduction_map)
    
AXIS_DATA.to_parquet(os.path.join(data_path, fstring, 'DATA.parquet'))
AXIS_DATA = AXIS_DATA.drop('Dataset', axis=1)

### TRAINING LOOP

In [None]:
OrdEncoder = OrdinalEncoder(
    categories='auto',
    dtype=int,
    handle_unknown='use_encoded_value',
    unknown_value=-2,
    encoded_missing_value=-1,
)
PipeOrdEncoder = ColumnTransformer([("cat_encoder", OrdEncoder, morphology_columns)], remainder='passthrough')
#
if ALL_FEATURES:
    _cat_enc = PipeOrdEncoder
else:
    _cat_enc = None
    
le_pipe_rf = Pipeline([
    ("CatEncoder", _cat_enc),
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("RandomForest", RandomForestClassifier(**randomforest_kwargs))])

le_pipe_gbc = Pipeline([
    ("CatEncoder", _cat_enc),
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("GradientBoosting", GradientBoostingClassifier(**gradientboosting_kwargs))])

le_pipe_xgb = Pipeline([
    ("CatEncoder", _cat_enc),
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("XGBoost", XGBClassifier(**xgboost_kwargs))])

le_pipe_dt = Pipeline([
    ("CatEncoder", _cat_enc),
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("DecisionTree", DecisionTreeClassifier(**decisiontree_kwargs))])

PipeDict = {
    'rf': le_pipe_rf,
    'gbc': le_pipe_gbc,
    'xgb': le_pipe_xgb,
    'dt': le_pipe_dt    
}

In [None]:
splitter = RepeatedStratifiedKFold(n_splits=num_splits, n_repeats=num_repeats, random_state=7)

X = AXIS_DATA.iloc[:, :-1]
Y = AXIS_DATA.iloc[:,-1]

lb = LabelBinarizer()
lbe = LabelEncoder()

 #(RES_LIST_AXIS[0]['Y_test'])
Yenc = lbe.fit_transform(Y.values) #(RES_LIST_AXIS[0]['Y_test'])
y_bin = lb.fit(Yenc)
ClassMap_AXIS = {i:c for i,c in enumerate(lbe.classes_)}

In [None]:
ClassMap_AXIS

In [None]:
RES_LIST_AXIS, RES_AXIS_DF = tree_utils.training_loop(X, Yenc, splitter, 
                              PipeDict, 
                              use_class_weights=USE_CLASS_WEIGHT, ClassMap=ClassMap_AXIS,
                              num_splits=num_splits, num_repeats=num_repeats, make_df=True)

In [None]:
RES_AXIS_DF.to_parquet(os.path.join(data_path, fstring, "RESULTS.parquet"))

### make roc and precision recall curves

In [None]:
Y.value_counts()

In [None]:
n_classes = len(lb.classes_)
colors = ['blue', 'green', 'red', 'yellow', 'magenta', 'cyan', 'black']

In [None]:
ClassMap_AXIS, lbe.classes_

In [None]:
PLOTS_AXIS = tree_utils.make_plots(RES_LIST_AXIS, lb,  n_classes, 
                                   colors, ClassMap_AXIS,
                                   output_map=os.path.join(data_path, fstring),
                                   show_plot=False, 
                                   plot_title="Heart Axis")
perf_list = tree_utils.get_performance(RES_LIST_AXIS, threshold=1/n_classes,
                                       ClassMap=ClassMap_AXIS, binarizer=lb)
PERF_AXIS = pd.DataFrame(perf_list)

In [None]:
PERF_AXIS[['f1', 'precision', 'recall', 'specificity', 'model', 'Class']].groupby(['model', 'Class']).mean()

In [None]:
tree_utils.net_benefit_curve_plot(RES_AXIS_DF, true_col_prefix='Y_test',
                                     pred_col_prefix='Y_pred',
                                     output_path=os.path.join(data_path, fstring),
                                     threshold_steps=20, 
                                     xlim=[0,0.5],
                                     ylim=[-1,1],
                                     plot_title="Heart Axis")

tree_utils.calibration_curve_plot(RES_AXIS_DF,
                                   true_col_prefix='Y_test',
                                   pred_col_prefix='Y_pred',
                                   output_path=os.path.join(data_path, fstring),
                                   n_bins=10,
                                   plot_title="Heart Axis",
                                   show_plot=True)

# Muscle model

In [None]:
target_col = "Diagnosis"
target_inclusion = ['SR','LVH','Microvoltages']
Reduction_map = {'Microvoltages': 'Disease', 
                 'LVH': 'Disease',
                 'SR': 'Normal'}
if ALL_FEATURES:
    features_to_use = []
else:
    features_to_use = ['qrs_vector mean lead_0',
                     'qrs_ampl mean lead_0',
                     'qrs_vector mean lead_1',
                     'qrs_ampl mean lead_1',
                     'qrs_vector mean lead_2',
                     'qrs_ampl mean lead_2',
                     'qrs_vector mean lead_3',
                     'qrs_ampl mean lead_3',
                     'qrs_vector mean lead_4',
                     'qrs_ampl mean lead_4',
                     'qrs_vector mean lead_5',
                     'qrs_ampl mean lead_5',
                     'qrs_vector mean lead_6',
                     'qrs_ampl mean lead_6',
                     'qrs_vector mean lead_7',
                     'qrs_ampl mean lead_7',
                     'morphology lead_0',
                     'morphology lead_1',
                     'morphology lead_2',
                     'morphology lead_3',
                     'morphology lead_4',
                     'morphology lead_5',
                     'morphology lead_6',
                     'morphology lead_7']

In [None]:
if len(features_to_use)==0:
    meas_cols = [c for c in DATA.columns if ('Dataset' not in c) 
                 & (target_col not in c)
                 & ("Heart Axis Diagnosis" not in c)]
else:
    meas_cols = features_to_use
    
fstring = f"MUSCLE_{CLASS_WEIGHT_STRING}{ALL_FEATURES_STRING}{REDUCED_LABEL_STRING}"
os.makedirs(os.path.join(data_path, fstring), exist_ok=True)

MUSCLE_DATA = DATA.loc[DATA[target_col].apply(lambda x: any([c in x for c in target_inclusion])), 
                       meas_cols+[target_col]+['Dataset']]

MUSCLE_DATA = MUSCLE_DATA.assign(Diagnosis=MUSCLE_DATA.Diagnosis.map({
                                                            'SR': 'SR',
                                                            'Microvoltages': 'Microvoltages',
                                                            'LVH': 'LVH',
                                                            'LAFB , LVH': 'LVH',
                                                            'Microvoltages , BF': 'Microvoltages',
                                                            'Microvoltages , RBBB': 'Microvoltages',
                                                            'Microvoltages , LAFB': 'Microvoltages',
                                                            'LVH , BF': 'LVH',
                                                            'LVH , RBBB': 'LVH',
                                                            'LVH , LBBB': 'LVH'
                                                        }))

if USE_REDUCED_LABELS:
    MUSCLE_DATA.loc[:, target_col] = MUSCLE_DATA[target_col].map(Reduction_map)
    
MUSCLE_DATA.to_parquet(os.path.join(data_path, fstring, f'DATA.parquet'))
MUSCLE_DATA = MUSCLE_DATA.drop('Dataset', axis=1)

## Training loop

In [None]:
OrdEncoder = OrdinalEncoder(
    categories='auto',
    dtype=int,
    handle_unknown='use_encoded_value',
    unknown_value=-2,
    encoded_missing_value=-1,
)
PipeOrdEncoder = ColumnTransformer([("cat_encoder", OrdEncoder, morphology_columns)], remainder='passthrough')
le_pipe_rf = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("RandomForest", RandomForestClassifier(**randomforest_kwargs))])
le_pipe_gbc = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("GradientBoosting", GradientBoostingClassifier(**gradientboosting_kwargs))])
le_pipe_xgb = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("XGBoost", XGBClassifier(**xgboost_kwargs))])
le_pipe_dt = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("DecisionTree", DecisionTreeClassifier(**decisiontree_kwargs))])

PipeDict = {
    'rf': le_pipe_rf,
    'gbc': le_pipe_gbc,
    'xgb': le_pipe_xgb,
    'dt': le_pipe_dt    
}

In [None]:
splitter = RepeatedStratifiedKFold(n_splits=num_splits, n_repeats=num_repeats, random_state=7)
X = MUSCLE_DATA.iloc[:, :-1]
Y = MUSCLE_DATA.iloc[:,-1]

lb = LabelBinarizer()
lbe = LabelEncoder()
 #(RES_LIST_AXIS[0]['Y_test'])
Yenc = lbe.fit_transform(Y.values) #(RES_LIST_AXIS[0]['Y_test'])
lb.fit(Yenc)
ClassMap_MUSCLE = {i:c for i,c in enumerate(lbe.classes_)}

In [None]:
ClassMap_MUSCLE

In [None]:
RES_LIST_MUSCLE, RES_MUSCLE_DF = tree_utils.training_loop(X, Yenc, splitter, PipeDict, 
                                    use_class_weights=USE_CLASS_WEIGHT, ClassMap=ClassMap_MUSCLE,
                                    num_splits=num_splits, num_repeats=num_repeats, make_df=True)

In [None]:
RES_MUSCLE_DF.to_parquet(os.path.join(data_path, fstring, "RESULTS.parquet"))

### make roc and precision recall curves

In [None]:
Y.value_counts()

In [None]:
n_classes = len(lb.classes_)
colors = ['blue', 'green', 'red', 'yellow', 'magenta', 'cyan', 'black']

In [None]:
PLOTS_MUSCLE = tree_utils.make_plots(RES_LIST_MUSCLE, lb,  n_classes, colors,
                                     ClassMap_MUSCLE,
                                     output_map=os.path.join(data_path, fstring),
                                     show_plot=False,
                                     plot_title="Heart Muscle")

perf_list = tree_utils.get_performance(RES_LIST_MUSCLE, threshold=1/n_classes, 
                                       ClassMap=ClassMap_MUSCLE, binarizer=lb)
PERF_MUSCLE = pd.DataFrame(perf_list)

In [None]:
PERF_MUSCLE[['f1', 'precision', 'recall', 'specificity', 'model', 'Class']].groupby(['model', 'Class']).mean()

In [None]:
tree_utils.net_benefit_curve_plot(RES_MUSCLE_DF, true_col_prefix='Y_test',
                                     pred_col_prefix='Y_pred',
                                     output_path=os.path.join(data_path, fstring),
                                     threshold_steps=20, 
                                     xlim=[0,0.5],
                                     ylim=[-1,1],
                                     plot_title="Heart Muscle")

tree_utils.calibration_curve_plot(RES_MUSCLE_DF,
                                   true_col_prefix='Y_test',
                                   pred_col_prefix='Y_pred',
                                   output_path=os.path.join(data_path, fstring),
                                   n_bins=10,
                                   plot_title="Heart Muscle",
                                   show_plot=True)

# Conduction model

In [116]:
target_col = "Diagnosis"
target_inclusion = ['BF', 'LBBB','RBBB','LAFB', 'SR']
Reduction_map = {'BF': 'Disease', 
                 'LBBB': 'Disease', 
                 'RBBB': 'Disease',
                 'LAFB': 'Disease',
                 'SR': 'Normal'}
features_to_use = []

In [117]:
if len(features_to_use)==0:
    meas_cols = [c for c in DATA.columns if ('Dataset' not in c) 
                 & (target_col not in c)
                 & ("Heart Axis Diagnosis" not in c)]
else:
    meas_cols = features_to_use
    
fstring = f"CONDUCTION{CLASS_WEIGHT_STRING}{ALL_FEATURES_STRING}{REDUCED_LABEL_STRING}"
os.makedirs(os.path.join(data_path, fstring), exist_ok=True)

CONDUCTION_DATA = DATA.loc[DATA[target_col].apply(lambda x: any([c in x for c in target_inclusion])),  meas_cols+[target_col]+['Dataset']]

CONDUCTION_DATA = CONDUCTION_DATA.assign(Diagnosis=CONDUCTION_DATA.Diagnosis.map({
                                                                'SR': 'SR',
                                                                'BF': 'BF',
                                                                'RBBB': 'RBBB',
                                                                'LBBB': 'LBBB',
                                                                'LAFB': 'LAFB',
                                                                'LAFB , LVH': 'LAFB',
                                                                'Microvoltages , BF': 'BF',
                                                                'Microvoltages , RBBB': 'RBBB',
                                                                'Microvoltages , LAFB': 'LAFB', 
                                                                'LVH , BF': 'BF',
                                                                'LVH , RBBB': 'RBBB',
                                                                'LVH , LBBB': 'LBBB'
                                                            }))
if USE_REDUCED_LABELS:
    CONDUCTION_DATA.loc[:, target_col] = CONDUCTION_DATA[target_col].map(Reduction_map)
    
CONDUCTION_DATA.to_parquet(os.path.join(data_path, fstring, 'CONDUCTION.parquet'))
CONDUCTION_DATA = CONDUCTION_DATA.drop('Dataset', axis=1)

In [118]:
# ['BF', 'LBBB','RBBB','LAFB', 'SR']


## Training loop

In [119]:
OrdEncoder = OrdinalEncoder(
    categories='auto',
    dtype=int,
    handle_unknown='use_encoded_value',
    unknown_value=-2,
    encoded_missing_value=-1,
)
PipeOrdEncoder = ColumnTransformer([("cat_encoder", OrdEncoder, morphology_columns)], remainder='passthrough')
le_pipe_rf = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("RandomForest", RandomForestClassifier(**randomforest_kwargs))])
le_pipe_gbc = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("GradientBoosting", GradientBoostingClassifier(**gradientboosting_kwargs))])
le_pipe_xgb = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("XGBoost", XGBClassifier(**xgboost_kwargs))])
le_pipe_dt = Pipeline([
    ("CatEncoder", PipeOrdEncoder), 
    ("Impute", IterativeImputer(**impute_kwargs)),
    ("DecisionTree", DecisionTreeClassifier(**decisiontree_kwargs))])

PipeDict = {
    'rf': le_pipe_rf,
    'gbc': le_pipe_gbc,
    'xgb': le_pipe_xgb,
    'dt': le_pipe_dt    
}

In [120]:
splitter = RepeatedStratifiedKFold(n_splits=num_splits, n_repeats=num_repeats, random_state=7)
X = CONDUCTION_DATA.iloc[:, :-1]
Y = CONDUCTION_DATA.iloc[:,-1]

lb = LabelBinarizer()
lbe = LabelEncoder()
Yenc = lbe.fit_transform(Y)
lb.fit(Yenc)    
ClassMap_CONDUCTION = {i:c for i,c in enumerate(lbe.classes_)}

In [121]:
ClassMap_CONDUCTION

{0: 'Disease', 1: 'Normal'}

In [122]:
RES_LIST_CONDUCTION, RES_CONDUCTION_DF = tree_utils.training_loop(X, Yenc, splitter, PipeDict,
                                    use_class_weights=USE_CLASS_WEIGHT, 
                                    ClassMap=ClassMap_CONDUCTION,
                                    num_splits=num_splits,num_repeats=num_repeats,
                                    make_df=True)

100%|██████████| 2/2 [02:09<00:00, 64.65s/it]


In [123]:
RES_CONDUCTION_DF.to_parquet(os.path.join(data_path, fstring, "RESULTS.parquet"))

### make roc and precision recall curves

In [124]:
Y.value_counts()

Diagnosis
Disease    702
Normal     649
Name: count, dtype: int64

In [125]:
n_classes = len(lb.classes_)
colors = ['blue', 'green', 'red', 'yellow', 'magenta', 'cyan', 'black']

In [126]:
PLOTS_CONDUCTION = tree_utils.make_plots(RES_LIST_CONDUCTION, lb,  n_classes, colors,
                                         ClassMap_CONDUCTION,
                                         output_map=os.path.join(data_path, fstring),
                                         show_plot=False,
                                         plot_title="Heart Conduction")

perf_list = tree_utils.get_performance(RES_LIST_CONDUCTION, threshold=1/n_classes, 
                                       ClassMap=ClassMap_CONDUCTION, binarizer=lb)

PERF_CONDUCTION = pd.DataFrame(perf_list)

In [127]:
PERF_CONDUCTION[['f1', 'precision', 'recall', 'specificity', 'model', 'Class']].groupby(['model', 'Class']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,f1,precision,recall,specificity
model,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DT,Disease,0.864529,0.849783,0.87982,0.467099
GBT,Disease/Normal,0.857952,0.837178,0.879791,0.411267
RF,Disease/Normal,0.898318,0.892232,0.904497,0.467099
XGB,Disease/Normal,0.900777,0.879647,0.922963,0.379263


In [128]:
tree_utils.net_benefit_curve_plot(RES_CONDUCTION_DF, true_col_prefix='Y_test',
                                     pred_col_prefix='Y_pred',
                                     output_path=os.path.join(data_path, fstring),
                                     threshold_steps=20, 
                                     xlim=[0,0.5],
                                     ylim=[-1,1],
                                     plot_title="Heart Conduction")

tree_utils.calibration_curve_plot(RES_CONDUCTION_DF,
                                   true_col_prefix='Y_test',
                                   pred_col_prefix='Y_pred',
                                   output_path=os.path.join(data_path, fstring),
                                   n_bins=10,
                                   plot_title="Heart Conduction",
                                   show_plot=True)

  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
  # Calculate net benefit for 'all positive' strategy
  # Calculate net benefit for 'all negative' strategy
