In [4]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import RobustScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from ens_selection import CES
from xgboost import XGBClassifier
import sys
import pandas as pd
import os



from ei import EnsembleIntegration

In [7]:
'''
Cell for loading in data
'''
csvs = "/Users/aviadsusman/PycharmProjects/EI_implement/data"
modalities = {}
for file_name in os.listdir(csvs):
    if not (file_name.startswith('labels') or file_name.startswith('data')):
        file_path = os.path.join(csvs, file_name)
        modality = os.path.splitext(file_name)[0]

        data = pd.read_csv(file_path).to_numpy()
        modalities[modality] = data
y = pd.read_csv('/Users/aviadsusman/PycharmProjects/EI_implement/data/labels.csv', header=None).to_numpy()

In [8]:
'''
train local models and meta models
'''
base_predictors = {
    'AdaBoost': AdaBoostClassifier(),
    'DT': DecisionTreeClassifier(),
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    'RF': RandomForestClassifier(),
    'SVM': SVC(kernel='linear', probability=True, max_iter=1000),
    'XGB': XGBClassifier(use_label_encoder=False, eval_metric='error')
}

EI = EnsembleIntegration(base_predictors=base_predictors,
                         k_outer=5,
                         k_inner=5,
                         n_samples=1,
                         sampling_strategy="undersampling",
                         sampling_aggregation="mean",
                         n_jobs=-1, 
                         random_state=42,
                         project_name="demo")


for name, modality in modalities.items():
    EI.train_base(modality, y, base_predictors, modality=name)

EI.save() 

meta_models = {
    "AdaBoost": AdaBoostClassifier(),
    "DT": DecisionTreeClassifier(),
    "GradientBoosting": GradientBoostingClassifier(),
    "KNN": KNeighborsClassifier(),
    "LR": LogisticRegression(),
    "NB": GaussianNB(),
    "MLP": MLPClassifier(),
    "RF": RandomForestClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1000),
    "XGB": XGBClassifier(use_label_encoder=False),
}

EI = EnsembleIntegration().load("EI.demo")  

EI.train_meta(meta_models=meta_models)  



########################################################################################################
######################################## health_status modality ########################################
######################################################################################################## 


Training base predictors and generating data for analysis...
Generating meta training data via nested cross validation...




Training base predictors on outer training sets...





Base predictor training is complete: see "base_summary" attribute for a summary of base predictor performance. Meta training data can be found in "meta_training_data" and "meta_test_data" attributes. Run "train_meta" method for analysis of ensemble algorithms.


###############################################################################################
######################################## diet modality ########################################
############################################################################################### 


Training base predictors and generating data for analysis...
Generating meta training data via nested cross validation...




Training base predictors on outer training sets...





Base predictor training is complete: see "base_summary" attribute for a summary of base predictor performance. Meta training data can be found in "meta_training_data" and "meta_test_data" attributes. Run "train_meta" method for analysis of ensemble algorithms.


###################################################################################################################
######################################## other_lifestyle_behavior modality ########################################
################################################################################################################### 


Training base predictors and generating data for analysis...
Generating meta training data via nested cross validation...




Training base predictors on outer training sets...





Base predictor training is complete: see "base_summary" attribute for a summary of base predictor performance. Meta training data can be found in "meta_training_data" and "meta_test_data" attributes. Run "train_meta" method for analysis of ensemble algorithms.


###########################################################################################################
######################################## sociodemographic modality ########################################
########################################################################################################### 


Training base predictors and generating data for analysis...
Generating meta training data via nested cross validation...




Training base predictors on outer training sets...





Base predictor training is complete: see "base_summary" attribute for a summary of base predictor performance. Meta training data can be found in "meta_training_data" and "meta_test_data" attributes. Run "train_meta" method for analysis of ensemble algorithms.

Saved to EI.demo



#####################################################################################################
######################################## Analysing ensembles ########################################
##################################################################################################### 


Mean...




fmax (minority):  0.299669295344696
f (majority):  0.7545908361561776
AUC:  0.6566801050686896
max MCC:  0.16427563726322839

CES...
fmax (minority):  0.2853333333333333
f (majority):  0.7325834542815676
AUC:  0.6200940650276285
max MCC:  0.13752507633137592

S.AdaBoost...
fmax (minority):  0.30426164519326065
f (majority):  0.7473229550976335
AUC:  0.6630140075177806
max MCC:  0.15920327042839713

S.DT...
fmax (minority):  0.2342793869106591
f (majority):  0.0
AUC:  0.5159607813735947
max MCC:  0.02530244949096735

S.GradientBoosting...
fmax (minority):  0.2910224438902743
f (majority):  0.7447706257294191
AUC:  0.6419691064428221
max MCC:  0.14620585752757914

S.KNN...
fmax (minority):  0.25601604278074863
f (majority):  0.6868023637557452
AUC:  0.5753777917627025
max MCC:  0.08774681730195961

S.LR...
fmax (minority):  0.3186972957255016
f (majority):  0.7999146029035012
AUC:  0.6865822345764139
max MCC:  0.18542144036543137

S.NB...
fmax (minority):  0.26795760826900433
f (majority

<ei.EnsembleIntegration at 0x7f81e33a85e0>

In [9]:
EI.meta_summary

{'metrics':                      Mean       CES  S.AdaBoost      S.DT  S.GradientBoosting  \
 fmax (minority)  0.299669  0.285333    0.304262  0.234279            0.291022   
 f (majority)     0.754591  0.732583    0.747323  0.000000            0.744771   
 AUC              0.656680  0.620094    0.663014  0.515961            0.641969   
 max MCC          0.164276  0.137525    0.159203  0.025302            0.146206   
 
                     S.KNN      S.LR      S.NB     S.MLP      S.RF     S.SVM  \
 fmax (minority)  0.256016  0.318697  0.267958  0.296768  0.286206  0.234286   
 f (majority)     0.686802  0.799915  0.753035  0.740443  0.786850  0.001217   
 AUC              0.575378  0.686582  0.580153  0.647847  0.635124  0.503800   
 max MCC          0.087747  0.185421  0.110850  0.154384  0.140720  0.011757   
 
                     S.XGB  
 fmax (minority)  0.279671  
 f (majority)     0.707684  
 AUC              0.627881  
 max MCC          0.128179  ,
 'thresholds':               

In [12]:
EI.meta_summary['metrics']

Unnamed: 0,Mean,CES,S.AdaBoost,S.DT,S.GradientBoosting,S.KNN,S.LR,S.NB,S.MLP,S.RF,S.SVM,S.XGB
fmax (minority),0.299669,0.285333,0.304262,0.234279,0.291022,0.256016,0.318697,0.267958,0.296768,0.286206,0.234286,0.279671
f (majority),0.754591,0.732583,0.747323,0.0,0.744771,0.686802,0.799915,0.753035,0.740443,0.78685,0.001217,0.707684
AUC,0.65668,0.620094,0.663014,0.515961,0.641969,0.575378,0.686582,0.580153,0.647847,0.635124,0.5038,0.627881
max MCC,0.164276,0.137525,0.159203,0.025302,0.146206,0.087747,0.185421,0.11085,0.154384,0.14072,0.011757,0.128179


In [13]:
EI.meta_summary['thresholds']

Unnamed: 0,Mean,CES,S.AdaBoost,S.DT,S.GradientBoosting,S.KNN,S.LR,S.NB,S.MLP,S.RF,S.SVM,S.XGB
fmax (minority),0.500324,0.500458,0.490748,0.0,0.190744,0.2,0.162483,0.35711,0.111451,0.25,0.092499,0.082094
f (majority),0.500324,0.500458,0.490748,0.0,0.190744,0.2,0.162483,0.35711,0.111451,0.25,0.092499,0.082094
AUC,,,,,,,,,,,,
max MCC,0.47,0.5,0.49,0.01,0.19,0.01,0.12,0.37,0.11,0.28,0.13,0.08
