# Using Adaboost Regressor to find B point

For details refer to

Shafa-at Ali Sheikh, Nil Z. Gurel, Shishir Gupta, Ikenna V. Chukwu, Oleksiy Levantsevych, Mhmtjamil Alkhalaf, Majd Soudan, Rami Abdulbaki, Ammer Haffar, Omer T. Inan, Amit J. Shah, Gari D. Clifford, Ali Bahrami Rad. "Data-Driven Approach for Automatic Detection of Aortic Valve Opening: B-point Detection from Impedance Cardiogram”


## Importing Libraries

In [1]:
import numpy as np
import pandas as pd

from scipy.stats import pearsonr

from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import mean_absolute_error, median_absolute_error

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Reading data 

In [2]:
dataset  = pd.read_csv('features_final_465.csv')    # raw dataset
dataset.head(5)

Unnamed: 0,id,subj_id,feat_vec_out_ 1,feat_vec_out_ 2,feat_vec_out_ 3,feat_vec_out_ 4,feat_vec_out_ 5,feat_vec_out_ 6,feat_vec_out_ 7,feat_vec_out_ 8,...,mag_C_max_2D,Diff_notch_infl,Diff_RC_notch,Diff_RC_infl,Diff_RC_max_2D,RC,RB_infl,RB_notch,RB_max_2D,RB_out
0,D_102D3_B_1,D_102,-0.204879,-0.191116,-0.17755,-0.164218,-0.151132,-0.138285,-0.125667,-0.113264,...,0.415008,37,56,93,23,93,0,37,70,37
1,D_102D3_B_2,D_102,0.014109,0.025135,0.036156,0.047166,0.058157,0.069116,0.080022,0.090851,...,0.501384,57,41,98,34,98,0,57,64,57
2,D_102D3_B_3,D_102,-0.016475,-0.004135,0.008555,0.021519,0.034661,0.047875,0.061056,0.074121,...,0.414927,57,40,97,29,97,0,57,68,57
3,D_102D3_B_4,D_102,-0.323237,-0.310921,-0.298491,-0.285942,-0.273269,-0.26047,-0.247544,-0.234484,...,0.628691,0,0,64,33,101,37,0,68,37
4,D_102D3_B_5,D_102,0.031615,0.042397,0.053486,0.064796,0.076233,0.087703,0.099117,0.110398,...,0.406913,66,34,100,31,100,0,66,69,66


## Extracting subject IDs for subject-wise training and testing

In [3]:
# Reading unique Subject Ids
X_ID = dataset.iloc[:, 1].values
(unique_ids, counts) = np.unique(X_ID, return_counts=True)

outer_train_lists =[]
outer_test_lists = []   # outer test list  will be used for testing the final 

outer_folds = 10

kfold_outer = KFold(n_splits=outer_folds, random_state=0, shuffle=True)
for outer_train_ix, outer_test_ix in kfold_outer.split(unique_ids):
    outer_train_lists.append(unique_ids[outer_train_ix].tolist())    # train folds from outer cv in form of list
    outer_test_lists.append(unique_ids[outer_test_ix].tolist())      # test fold from outer cv in form of list

## Hyperparameters for 10 folds found via 5-fold nested CV (for details refer to the publication)


In [4]:
## Hyperparameters for 10 folds found via 5-fold nested CV (for details refer to Publication)
max_feat = [0.7,0.3,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7]
max_dep  = [12,10,20,15,15,20,15,12,22,15]
LR       = [0.1,0.05,1,0.1,0.5,1,1,1,0.1,0.01]

## Applying Adaboost model 

In [6]:
outer_fold_MAE   = []
outer_fold_SDE   = []
outer_fold_MedAE = []
fold_df_list     = []

for out_fold in range(outer_folds):  
    print('\nOuter fold:', out_fold)
    outer_train_ids      = outer_train_lists[out_fold]      # list of subj_ids for out_fold   
# dataframe of train beats for outer train fold
    outer_train_beats_df = dataset[dataset.subj_id.str.contains('|'.join(outer_train_ids ))]   
    X_tr_out             = outer_train_beats_df.iloc[:,2 :-1].values
    y_tr_out             = outer_train_beats_df.iloc[:, -1].values        # seleccting the last column 
    #outer_train          = np.array(outer_train_lists[out_fold])         # subj IDs for training

# Adaboost Regressor     
    best_base_est    = DecisionTreeRegressor(max_features= max_feat[out_fold], 
                                             max_depth=max_dep[out_fold],
                                             random_state=10)

    Best_inner_model =  AdaBoostRegressor(base_estimator= best_base_est,
                                          loss='exponential', n_estimators=500,
                                          learning_rate= LR[out_fold],                                           
                                          random_state=1) 
    print('Best Inner Model',Best_inner_model)
    
# Extracting test dataset for outer CV
    search_values         = outer_test_lists[out_fold]
    outer_test_beats_df   = dataset[dataset.subj_id.str.contains('|'.join(search_values ))]

# Extracting beats with features
    outer_test_fless_df   = outer_test_beats_df.loc[(outer_test_beats_df['RB_notch'] < 35) & (outer_test_beats_df['RB_infl'] < 35)] 
    y_pred_fless          = outer_test_fless_df.iloc[:,-2].values   # RB_max_2D
    y_test_fless          = outer_test_fless_df.iloc[:,-1].values   # RB_out
    fless_test_rows_index = outer_test_fless_df.index
    outer_test_feat_df    = outer_test_beats_df.drop(fless_test_rows_index) 
    
# Test ids for beats and subjects with and with out features    
    id_beat_test       = outer_test_beats_df.iloc[:,0].values
    id_beat_test_feat  = outer_test_feat_df.iloc[:,0].values
    id_beat_test_fless = outer_test_fless_df.iloc[:,0].values
    id_beat_test_all   = np.concatenate((id_beat_test_feat, id_beat_test_fless))
    id_subj_feat       = outer_test_feat_df.iloc[:,1].values
    id_subj_fless      = outer_test_fless_df.iloc[:,1].values
    id_subj_all        = np.concatenate((id_subj_feat, id_subj_fless))

    X_test_feat  = outer_test_feat_df.iloc[:,2 :-1].values
    y_test_feat  = outer_test_feat_df.iloc[:,-1].values
    scaler_out   = StandardScaler()
    X_tr_out     = scaler_out.fit_transform(X_tr_out)
    X_test_feat  = scaler_out.transform(X_test_feat)
    
#   Applying best inner model       
    result_outer = Best_inner_model.fit(X_tr_out,y_tr_out)       # trained on all data
    y_pred_feat  = result_outer.predict(X_test_feat)
    y_test_all   = np.concatenate((y_test_feat, y_test_fless))   
    y_pred_all   = np.concatenate((y_pred_feat, y_pred_fless))

#   Computing Performance Statistics
    MAE_fold     = mean_absolute_error(y_test_all, y_pred_all)
    MedAE_fold   = median_absolute_error(y_test_all, y_pred_all)
    outer_fold_MAE.append(MAE_fold)
    outer_fold_MedAE.append(MedAE_fold)
    outer_fold_SDE.append(np.std(np.absolute(y_test_all - y_pred_all)))
    print('Mean Absolute Error:', MAE_fold)

# Saving results for further detailed analysis
    RB_max_2D_feat    = outer_test_feat_df.iloc[:,-2].values
    RB_notch_feat     = outer_test_feat_df.iloc[:,-3].values
    RB_infl_feat      = outer_test_feat_df.iloc[:,-4].values
    RC_feat           = outer_test_feat_df.iloc[:,-5].values
    RB_max_2D_fless   = outer_test_fless_df.iloc[:,-2].values
    RB_notch_fless    = outer_test_fless_df.iloc[:,-3].values
    RB_infl_fless     = outer_test_fless_df.iloc[:,-4].values
    RC_fless          = outer_test_fless_df.iloc[:,-5].values
    RB_max_2D_all     = np.concatenate((RB_max_2D_feat, RB_max_2D_fless))
    RB_notch_all      = np.concatenate((RB_notch_feat, RB_notch_fless))
    RB_infl_all       = np.concatenate((RB_infl_feat, RB_infl_fless))
    RC_all            = np.concatenate((RC_feat, RC_fless))
    RB_poly_all       = 1.233*RC_all - 0.0032*(RC_all**2) -31.59    # Lozano RB quadratic
    Diff_RB_test_pred = y_test_all - y_pred_all

    y_result = np.concatenate((
                        out_fold*np.ones(len(y_test_all)).reshape(len(y_test_all),1),
                        id_beat_test_all.reshape(len(y_test_all),1), 
                        id_subj_all.reshape(len(y_test_all),1), 
                        RC_all.reshape(len(y_test_all),1),     
                        RB_poly_all.reshape(len(y_test_all),1),     
                        RB_notch_all.reshape(len(y_test_all),1), 
                        RB_infl_all.reshape(len(y_test_all),1), 
                        RB_max_2D_all.reshape(len(y_test_all),1), 
                        y_test_all.reshape(len(y_test_all),1),
                        y_pred_all.reshape(len(y_pred_all),1), 
                        Diff_RB_test_pred.reshape(len(y_pred_all),1),         
                                  ),1) 
    y_df = pd.DataFrame(y_result)
    fold_df_list.append(y_df)


print('\nResults for predicted RB')    
print('\tOuter fold MAE ', outer_fold_MAE)
print('\tOuter fold median absolute error', outer_fold_MedAE)

final_data_df = pd.concat(fold_df_list)
#pd.DataFrame(final_data_df).to_csv("Result_AB.csv")    # detailed result for further analysis

RB_test       = final_data_df.iloc[:,8]
RB_pred       = final_data_df.iloc[:,9]
corr_AB, _    = pearsonr(RB_test, RB_pred)

print('\nOverall MAE for pred: ', mean_absolute_error(RB_test, RB_pred))
print('\nOverall Median Abs Error for pred: ', median_absolute_error(RB_test, RB_pred))
print('\nOverall SDE:', np.std(RB_test - RB_pred))
print('\tPearsons correlation: %.3f' % corr_AB)


Outer fold: 0
Best Inner Model AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=12,
                                                       max_features=0.7,
                                                       random_state=10),
                  learning_rate=0.1, loss='exponential', n_estimators=500,
                  random_state=1)
Mean Absolute Error: 2.82157018842347

Outer fold: 1
Best Inner Model AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=10,
                                                       max_features=0.3,
                                                       random_state=10),
                  learning_rate=0.05, loss='exponential', n_estimators=500,
                  random_state=1)
Mean Absolute Error: 6.283996662785276

Outer fold: 2
Best Inner Model AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=20,
                                                       max_features=0.7,
                                    