# Compare Results Across Dataset for Machine Learning Pipeline (Supervised Binary Classification):
* Author: Ryan Urbanowicz, PhD 
* Affiliation: University of Pennsylvania - Department of Biostatistics, Epidemiology, and Informatics & Institute for Biomedical Informatics (IBI) 
* Date: 1/25/20
* Requirements: Anaconda (with Python 3), skrebate
    - Install most recent version of anaconda

- for each dataset and ML algorithm metric summary file...
- open associated metric file and calculate the average and standard devation of each set of 10 CV values
- Create summary files for each metric and save as individual excel files.
- Run statistical significance comparisons on metric results (non-parametric)


In [1]:
import os
import random
import copy
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None) # display all the columns
import scipy.stats as scs
from scipy.stats import randint
import warnings
warnings.filterwarnings('ignore')

#Statistics packages
from scipy import stats

#Plotting
import matplotlib.pyplot as plt

# Jupyter Notebook Hack: This code ensures that the results of multiple commands within a given cell are all displayed, rather than just the last. 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Specify dataset specific paths
This should be the only section/cell in this notebook that is customized to the specific dataset. It may be necessary to change the names and paths to point this notebook to the 

---
# Set Dataset Comparison Variables (Mandatory)
These variables values will have to be respecified to run this pipeline on a given dataset or computer

In [2]:
sig_cutoff = 0.05
cv = 10 #CV partitions - make sure this matches the setting in the ML pipeline
output_folder = 'results'

baseFolderName = 'ml_pipe_biclass_'
folderSubNames = ['EpiOnly', 'Epi_DietAdj','Epi_DietAdj_Matched']

#unique dataset names from analysis outputs
datasets = ['EpiOnly_20180710_clean_scale_imp_CV_S_FS','Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS','Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS']

#algorithm names as specified in the respective analysis pipeline notebooks
algorithmsToRun = ['logistic_regression','decision_tree', 'random_forest', 'naive_bayes', 'XGB', 'LGB','SVM','ANN','LCS','LCS_QRF'] 
methodsKey = ('Logistic Regression', 'Decision Tree', 'Random Forest', 'Naïve Bayes','XGB','LGB', 'SVM', 'ANN','LCS','LCS_QRF')
algColors = ['black','yellow','orange','grey','purple', 'aqua', 'red', 'pink', 'green', 'blue']

metrics = ['Accuracy','Balanced Accuracy','F1_Score','Precision','Recall','Specificity','TN','TP','FN','FP','ROC_AUC','PRC_AUC','PRC_APS']

#Working directory path automatically detected
wd_path = os.getcwd()
wd_path = wd_path.replace('\\','/')
wd_path = wd_path+'/'
print("Working Directory: "+wd_path)

Working Directory: C:/home/work/research_urbslab/ml_pipe/lynch_ml_pipe_biclass/


# Kruscall Wallis (ANOVA-like) comparison between all datasets

In [3]:
label = ['statistic','pvalue','sig']
for dataset in datasets:
    label.append('mean_'+dataset)
    label.append('std_'+dataset)
    
for algorithm in algorithmsToRun:
    print(algorithm)
    
    kruskal_summary = pd.DataFrame(index=metrics,columns=label)
    
    for metric in metrics:
        i = 0 #dataset count
        tempArray = []
        ave_list =[]
        sd_list = []
        for dataset in datasets:
            #load metric results for given dataset
            filename = wd_path+baseFolderName+folderSubNames[i]+'/'+output_folder+'/'+algorithm+'_Metrics_'+dataset+'.csv'
            td = pd.read_csv(filename)
            tempArray.append(td[metric])
            ave_list.append(td[metric].mean())
            sd_list.append(td[metric].std())
            i += 1
            
        result = stats.kruskal(*tempArray)
        kruskal_summary.at[metric, 'statistic'] = str(round(result[0],6))
        kruskal_summary.at[metric, 'pvalue'] = str(round(result[1],6))
        
        if result[1] < sig_cutoff:
            kruskal_summary.at[metric, 'sig'] = str('*')
        else:
            kruskal_summary.at[metric, 'sig'] = str('')
        
        for j in range(len(ave_list)):
            kruskal_summary.at[metric, 'mean_'+datasets[j]] = str(round(ave_list[j],6))
            kruskal_summary.at[metric, 'std_'+datasets[j]] = str(round(sd_list[j],6))

    kruskal_summary
    kruskal_summary.to_csv(wd_path+algorithm+'_'+'DataCompare_KruskalWallis.csv')  
 

logistic_regression


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,18.599777,9.1e-05,*,0.753816,0.023504,0.744595,0.019284,0.644303,0.074209
Balanced Accuracy,17.686515,0.000144,*,0.679525,0.03277,0.69186,0.035767,0.568333,0.047721
F1_Score,2.144993,0.342153,,0.422126,0.04204,0.430322,0.041693,0.388241,0.072451
Precision,15.701715,0.000389,*,0.334959,0.037124,0.331144,0.032027,0.488885,0.118264
Recall,19.541708,5.7e-05,*,0.57125,0.048251,0.615,0.063955,0.342312,0.105071
Specificity,5.718565,0.05731,,0.787801,0.020111,0.768721,0.015244,0.794355,0.146832
TN,22.20963,1.5e-05,*,338.6,8.783824,330.4,6.769211,51.8,10.141225
TP,21.517987,2.1e-05,*,45.7,3.860052,49.2,5.116422,11.3,3.831159
FN,20.73859,3.1e-05,*,34.3,3.860052,30.8,5.116422,21.5,3.407508
FP,22.224493,1.5e-05,*,91.2,8.599742,99.4,6.484169,13.4,9.663218


decision_tree


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,16.74437,0.000231,*,0.758134,0.026779,0.70656,0.042949,0.647138,0.054351
Balanced Accuracy,18.010323,0.000123,*,0.674456,0.026167,0.663711,0.037956,0.555145,0.047485
F1_Score,2.673548,0.262692,,0.418289,0.035169,0.392151,0.042282,0.323131,0.139748
Precision,18.23521,0.00011,*,0.338097,0.035371,0.294005,0.03881,0.478162,0.118044
Recall,18.785887,8.3e-05,*,0.5525,0.050621,0.60125,0.084666,0.278286,0.154783
Specificity,7.90692,0.019188,*,0.796412,0.033,0.726173,0.056632,0.832005,0.127518
TN,22.918334,1.1e-05,*,342.3,14.244297,312.1,24.200321,54.4,9.834181
TP,20.925084,2.9e-05,*,44.2,4.049691,48.1,6.773314,9.1,5.216427
FN,13.191598,0.001366,*,35.8,4.049691,31.9,6.773314,23.7,5.696978
FP,22.803049,1.1e-05,*,87.5,14.175487,117.7,24.376902,10.8,8.066391


random_forest


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,21.487921,2.2e-05,*,0.764598,0.026874,0.784025,0.012888,0.63247,0.048821
Balanced Accuracy,19.098442,7.1e-05,*,0.679814,0.030038,0.692355,0.025058,0.558234,0.046823
F1_Score,6.089097,0.047618,*,0.427154,0.03995,0.44774,0.032054,0.373204,0.092206
Precision,9.466184,0.008799,*,0.347112,0.038331,0.374162,0.025658,0.444271,0.087894
Recall,19.491527,5.9e-05,*,0.55625,0.036917,0.55875,0.051724,0.335518,0.098078
Specificity,7.446627,0.024154,*,0.803378,0.025968,0.825959,0.014464,0.780949,0.079663
TN,21.854286,1.8e-05,*,345.3,11.421714,355.0,6.394442,51.0,6.733003
TP,19.535201,5.7e-05,*,44.5,2.953341,44.7,4.137901,11.0,3.299832
FN,19.508973,5.8e-05,*,35.5,2.953341,35.3,4.137901,21.8,3.645393
FP,21.883523,1.8e-05,*,84.5,11.078006,74.8,6.178817,14.2,5.116422


naive_bayes


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,21.283478,2.4e-05,*,0.821692,0.015158,0.810315,0.012108,0.640961,0.041126
Balanced Accuracy,9.494194,0.008677,*,0.605329,0.024046,0.621981,0.024943,0.572899,0.039524
F1_Score,7.667097,0.021633,*,0.338319,0.047407,0.364418,0.043964,0.405056,0.061156
Precision,5.134691,0.076739,,0.40701,0.064068,0.384585,0.044971,0.459535,0.074197
Recall,8.291167,0.015834,*,0.29,0.038998,0.3475,0.048876,0.367336,0.069496
Specificity,24.142565,6e-06,*,0.920658,0.012258,0.896463,0.011307,0.778462,0.061422
TN,24.169467,6e-06,*,395.7,5.396501,385.3,4.900113,50.8,5.308274
TP,21.893286,1.8e-05,*,23.2,3.119829,27.8,3.910101,12.0,2.160247
FN,21.903057,1.8e-05,*,56.8,3.119829,52.2,3.910101,20.8,3.155243
FP,24.174855,6e-06,*,34.1,5.258855,44.5,4.859127,14.4,3.864367


XGB


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,19.718062,5.2e-05,*,0.762049,0.023483,0.756368,0.018505,0.59823,0.053558
Balanced Accuracy,19.726452,5.2e-05,*,0.685425,0.027553,0.694774,0.026659,0.518401,0.024516
F1_Score,18.095484,0.000118,*,0.431666,0.036297,0.438022,0.032546,0.308654,0.057124
Precision,1.643871,0.43958,,0.346502,0.034729,0.343942,0.027602,0.388388,0.076114
Recall,20.677473,3.2e-05,*,0.57375,0.039286,0.605,0.05277,0.277747,0.093816
Specificity,1.644237,0.4395,,0.7971,0.023322,0.784547,0.021427,0.759055,0.119283
TN,19.948798,4.7e-05,*,342.6,10.265368,337.2,9.283199,49.5,8.08634
TP,20.691332,3.2e-05,*,45.9,3.142893,48.4,4.221637,9.1,3.107339
FN,18.474771,9.7e-05,*,34.1,3.142893,31.6,4.221637,23.7,3.622461
FP,19.99263,4.6e-05,*,87.2,9.953224,92.6,9.191784,15.7,7.631077


LGB


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,19.387611,6.2e-05,*,0.76323,0.018565,0.759502,0.019469,0.592462,0.043041
Balanced Accuracy,20.340654,3.8e-05,*,0.685108,0.029538,0.700193,0.033299,0.524167,0.031057
F1_Score,15.973231,0.00034,*,0.431094,0.03764,0.444569,0.038924,0.340066,0.058465
Precision,3.571069,0.167707,,0.346476,0.031928,0.349173,0.031575,0.375462,0.044545
Recall,20.967584,2.8e-05,*,0.57125,0.050707,0.61375,0.065736,0.319955,0.086663
Specificity,6.934614,0.031201,*,0.798966,0.016566,0.786636,0.021058,0.728379,0.085663
TN,19.940496,4.7e-05,*,343.4,7.351493,338.1,9.218942,47.5,6.276057
TP,21.295009,2.4e-05,*,45.7,4.056545,49.1,5.258855,10.5,2.953341
FN,19.320568,6.4e-05,*,34.3,4.056545,30.9,5.258855,22.3,3.267687
FP,19.940496,4.7e-05,*,86.4,7.058486,91.7,9.006788,17.7,5.696978


SVM


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,23.714006,7e-06,*,0.766961,0.017075,0.788936,0.00867,0.593214,0.065463
Balanced Accuracy,15.269677,0.000483,*,0.67613,0.02183,0.590989,0.043594,0.566942,0.056289
F1_Score,13.272258,0.001312,*,0.423126,0.029754,0.304282,0.066326,0.443553,0.073854
Precision,13.24129,0.001333,*,0.346722,0.02874,0.312955,0.040788,0.414146,0.06271
Recall,13.118524,0.001417,*,0.54375,0.035478,0.3025,0.105869,0.490685,0.115589
Specificity,24.808816,4e-06,*,0.808509,0.017353,0.879479,0.021848,0.643199,0.113326
TN,25.573725,3e-06,*,347.5,7.604823,378.0,9.404491,41.9,7.40045
TP,20.024548,4.5e-05,*,43.5,2.838231,24.2,8.469553,16.2,4.417138
FN,23.889705,6e-06,*,36.5,2.838231,55.8,8.469553,16.6,3.50238
FP,25.573725,3e-06,*,82.3,7.424434,51.8,9.390302,23.3,7.528465


ANN


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,21.687069,2e-05,*,0.801884,0.023341,0.824442,0.018093,0.659169,0.02869
Balanced Accuracy,13.016774,0.001491,*,0.582392,0.030051,0.580001,0.014451,0.54677,0.016185
F1_Score,0.43826,0.803217,,0.29077,0.064918,0.28659,0.029749,0.283277,0.060468
Precision,12.581509,0.001853,*,0.338001,0.08133,0.408129,0.079171,0.507618,0.103342
Recall,5.231932,0.073097,,0.2625,0.073125,0.22375,0.023162,0.208146,0.075706
Specificity,5.65797,0.059073,,0.902284,0.033331,0.936251,0.021792,0.885394,0.069338
TN,21.944776,1.7e-05,*,387.8,14.289079,402.4,9.335714,57.8,6.232531
TP,20.542694,3.5e-05,*,21.0,5.849976,17.9,1.852926,6.8,2.394438
FN,21.212615,2.5e-05,*,59.0,5.849976,62.1,1.852926,26.0,3.431877
FP,22.01211,1.7e-05,*,42.0,14.337209,27.4,9.371351,7.4,4.299871


LCS


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,19.448048,6e-05,*,0.790699,0.011011,0.784021,0.021788,0.62416,0.031341
Balanced Accuracy,18.984223,7.5e-05,*,0.66681,0.019096,0.668444,0.031829,0.553602,0.034158
F1_Score,4.466155,0.107198,,0.421452,0.027015,0.421128,0.044115,0.371305,0.065255
Precision,7.206119,0.02724,*,0.372555,0.023945,0.364819,0.041392,0.424941,0.053382
Recall,11.575969,0.003064,*,0.48625,0.039726,0.5,0.056519,0.33938,0.096528
Specificity,11.243724,0.003618,*,0.84737,0.012996,0.836888,0.022041,0.767824,0.066142
TN,19.783668,5.1e-05,*,364.2,5.633235,359.7,9.707386,50.2,6.442912
TP,19.453195,6e-05,*,38.9,3.17805,40.0,4.521553,11.0,2.666667
FN,19.435826,6e-05,*,41.1,3.17805,40.0,4.521553,21.8,4.341019
FP,19.857232,4.9e-05,*,65.6,5.581716,70.1,9.421606,15.0,3.800585


LCS_QRF


Unnamed: 0,statistic,pvalue,sig,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,20.145463,4.2e-05,*,0.844648,0.008045,0.849548,0.007751,0.663783,0.018093
Balanced Accuracy,17.009032,0.000203,*,0.557903,0.016373,0.574542,0.020339,0.518093,0.021543
F1_Score,13.79871,0.001008,*,0.218767,0.048615,0.264224,0.051735,0.131222,0.065495
Precision,4.570462,0.101751,,0.529261,0.090845,0.572236,0.080163,0.489603,0.211687
Recall,13.697831,0.001061,*,0.14,0.035746,0.17375,0.042675,0.077586,0.042926
Specificity,8.456503,0.014578,*,0.975807,0.010289,0.975335,0.008007,0.9586,0.020886
TN,19.425469,6.1e-05,*,419.4,4.247875,419.2,3.614784,62.5,3.62859
TP,20.486531,3.6e-05,*,11.2,2.859681,13.9,3.414023,2.5,1.269296
FN,20.45902,3.6e-05,*,68.8,2.859681,66.1,3.414023,30.3,3.164034
FP,19.301118,6.4e-05,*,10.4,4.427189,10.6,3.438346,2.7,1.337494


# Mann-Whitney U test (Pairwise Comparisons)

In [4]:
label = ['metric','dataset1', 'dataset2','statistic','pvalue','sig']

for i in range(1,len(datasets)):
    label.append('mean_dataset'+str(i))
    label.append('std_dataset'+str(i))

for algorithm in algorithmsToRun:
    print(algorithm)
    
    #mann_whit_summary = pd.DataFrame(columns=label)
    master_list = []
    for metric in metrics:
        #Cycle through unique combinations of datasets
        for x in range(0,len(datasets)-1):
            for y in range(x+1,len(datasets)):
                tempList = []
                
                file1 = wd_path+baseFolderName+folderSubNames[x]+'/'+output_folder+'/'+algorithm+'_Metrics_'+datasets[x]+'.csv'
                td1 = pd.read_csv(file1)
                set1 = td1[metric]
                ave1 = td1[metric].mean()
                sd1 = td1[metric].std()
                
                file2 = wd_path+baseFolderName+folderSubNames[y]+'/'+output_folder+'/'+algorithm+'_Metrics_'+datasets[y]+'.csv'
                td2 = pd.read_csv(file2)
                set2 = td2[metric]
                ave2 = td2[metric].mean()
                sd2 = td2[metric].std()
                
                result = (stats.mannwhitneyu(set1,set2))

                tempList.append(str(metric))
                tempList.append(str(datasets[x]))
                tempList.append(str(datasets[y]))
                tempList.append(str(round(result[0],6)))
                tempList.append(str(round(result[1],6)))
                
                if result[1] < sig_cutoff:
                    tempList.append(str('*'))
                else:
                    tempList.append(str(''))

                tempList.append(str(round(ave1,6)))
                tempList.append(str(round(sd1,6)))
                tempList.append(str(round(ave2,6)))
                tempList.append(str(round(sd2,6)))

                master_list.append(tempList)
                
    # Create the pandas DataFrame  
    df = pd.DataFrame(master_list) 
    df.columns = label
    df
    df.to_csv(wd_path+algorithm+'_'+'DataCompare_MannWhitney.csv')  

logistic_regression


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,33.0,0.105803,,0.753816,0.023504,0.744595,0.019284
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,2.0,0.000163,*,0.753816,0.023504,0.644303,0.074209
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,2.0,0.000163,*,0.744595,0.019284,0.644303,0.074209
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,35.0,0.136428,,0.679525,0.03277,0.69186,0.035767
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,3.0,0.000219,*,0.679525,0.03277,0.568333,0.047721
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,3.0,0.00022,*,0.69186,0.035767,0.568333,0.047721
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,42.0,0.285303,,0.422126,0.04204,0.430322,0.041693
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,37.0,0.172261,,0.422126,0.04204,0.388241,0.072451
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,32.0,0.092938,,0.430322,0.041693,0.388241,0.072451
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,48.0,0.454793,,0.334959,0.037124,0.331144,0.032027


decision_tree


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,16.0,0.005649,*,0.758134,0.026779,0.70656,0.042949
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,2.0,0.000164,*,0.758134,0.026779,0.647138,0.054351
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,19.0,0.010567,*,0.70656,0.042949,0.647138,0.054351
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,37.0,0.172352,,0.674456,0.026167,0.663711,0.037956
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,1.0,0.000123,*,0.674456,0.026167,0.555145,0.047485
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,4.0,0.000291,*,0.663711,0.037956,0.555145,0.047485
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,30.0,0.070233,,0.418289,0.035169,0.392151,0.042282
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,34.0,0.120661,,0.418289,0.035169,0.323131,0.139748
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,40.0,0.236338,,0.392151,0.042282,0.323131,0.139748
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,18.0,0.008629,*,0.338097,0.035371,0.294005,0.03881


random_forest


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,19.5,0.011464,*,0.764598,0.026874,0.784025,0.012888
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,1.0,0.000122,*,0.764598,0.026874,0.63247,0.048821
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,0.784025,0.012888,0.63247,0.048821
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,32.0,0.092856,,0.679814,0.030038,0.692355,0.025058
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,2.0,0.000165,*,0.679814,0.030038,0.558234,0.046823
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,1.0,0.000122,*,0.692355,0.025058,0.558234,0.046823
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,27.0,0.044426,*,0.427154,0.03995,0.44774,0.032054
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,30.0,0.070233,,0.427154,0.03995,0.373204,0.092206
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,23.0,0.022537,*,0.44774,0.032054,0.373204,0.092206
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,23.0,0.022537,*,0.347112,0.038331,0.374162,0.025658


naive_bayes


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,23.0,0.022174,*,0.821692,0.015158,0.810315,0.012108
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,0.821692,0.015158,0.640961,0.041126
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.810315,0.012108,0.640961,0.041126
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,28.0,0.052055,,0.605329,0.024046,0.621981,0.024943
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,23.0,0.022577,*,0.605329,0.024046,0.572899,0.039524
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,14.0,0.003642,*,0.621981,0.024943,0.572899,0.039524
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,29.0,0.060612,,0.338319,0.047407,0.364418,0.043964
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,16.0,0.005665,*,0.338319,0.047407,0.405056,0.061156
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,30.0,0.070233,,0.364418,0.043964,0.405056,0.061156
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,45.0,0.366865,,0.40701,0.064068,0.384585,0.044971


XGB


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,38.5,0.202397,,0.762049,0.023483,0.756368,0.018505
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.762049,0.023483,0.59823,0.053558
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.756368,0.018505,0.59823,0.053558
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,38.0,0.192337,,0.685425,0.027553,0.694774,0.026659
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.685425,0.027553,0.518401,0.024516
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.694774,0.026659,0.518401,0.024516
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,41.0,0.260261,,0.431666,0.036297,0.438022,0.032546
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,3.0,0.00022,*,0.431666,0.036297,0.308654,0.057124
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,1.0,0.000123,*,0.438022,0.032546,0.308654,0.057124
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,46.0,0.395668,,0.346502,0.034729,0.343942,0.027602


LGB


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,49.0,0.484885,,0.76323,0.018565,0.759502,0.019469
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.76323,0.018565,0.592462,0.043041
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.759502,0.019469,0.592462,0.043041
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,30.5,0.075387,,0.685108,0.029538,0.700193,0.033299
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.685108,0.029538,0.524167,0.031057
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.700193,0.033299,0.524167,0.031057
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,31.5,0.086728,,0.431094,0.03764,0.444569,0.038924
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,7.0,0.000657,*,0.431094,0.03764,0.340066,0.058465
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,5.0,0.000384,*,0.444569,0.038924,0.340066,0.058465
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,40.5,0.248065,,0.346476,0.031928,0.349173,0.031575


SVM


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,9.0,0.001084,*,0.766961,0.017075,0.788936,0.00867
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,0.766961,0.017075,0.593214,0.065463
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,0.788936,0.00867,0.593214,0.065463
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,10.0,0.001414,*,0.67613,0.02183,0.590989,0.043594
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,3.0,0.00022,*,0.67613,0.02183,0.566942,0.056289
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,38.0,0.192337,,0.590989,0.043594,0.566942,0.056289
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,9.0,0.001101,*,0.423126,0.029754,0.304282,0.066326
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,40.0,0.236338,,0.423126,0.029754,0.443553,0.073854
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,9.0,0.001101,*,0.304282,0.066326,0.443553,0.073854
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,23.0,0.022577,*,0.346722,0.02874,0.312955,0.040788


ANN


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,20.0,0.012818,*,0.801884,0.023341,0.824442,0.018093
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,0.801884,0.023341,0.659169,0.02869
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.824442,0.018093,0.659169,0.02869
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,48.0,0.454861,,0.582392,0.030051,0.580001,0.014451
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,12.0,0.002293,*,0.582392,0.030051,0.54677,0.016185
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,6.0,0.000504,*,0.580001,0.014451,0.54677,0.016185
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,45.0,0.366865,,0.29077,0.064918,0.28659,0.029749
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,42.5,0.298212,,0.29077,0.064918,0.283277,0.060468
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,44.0,0.338735,,0.28659,0.029749,0.283277,0.060468
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,26.0,0.037831,*,0.338001,0.08133,0.408129,0.079171


LCS


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,46.0,0.395206,,0.790699,0.011011,0.784021,0.021788
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.790699,0.011011,0.62416,0.031341
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,8.9e-05,*,0.784021,0.021788,0.62416,0.031341
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,47.5,0.439892,,0.66681,0.019096,0.668444,0.031829
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.66681,0.019096,0.553602,0.034158
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,1.0,0.000123,*,0.668444,0.031829,0.553602,0.034158
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,49.5,0.5,,0.421452,0.027015,0.421128,0.044115
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,25.0,0.032011,*,0.421452,0.027015,0.371305,0.065255
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,27.0,0.044487,*,0.421128,0.044115,0.371305,0.065255
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,49.5,0.5,,0.372555,0.023945,0.364819,0.041392


LCS_QRF


Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,mean_dataset1,std_dataset1,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,33.0,0.105285,,0.844648,0.008045,0.849548,0.007751
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,0.844648,0.008045,0.663783,0.018093
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,0.849548,0.007751,0.663783,0.018093
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,28.0,0.052055,,0.557903,0.016373,0.574542,0.020339
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,9.0,0.001101,*,0.557903,0.016373,0.518093,0.021543
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,2.0,0.000165,*,0.574542,0.020339,0.518093,0.021543
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,28.0,0.052055,,0.218767,0.048615,0.264224,0.051735
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,17.0,0.00701,*,0.218767,0.048615,0.131222,0.065495
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,5.0,0.000384,*,0.264224,0.051735,0.131222,0.065495
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,35.5,0.144414,,0.529261,0.090845,0.572236,0.080163


# Best Kruskal Wallis results comparison
- For each metric, identify the best model and compare that performance between datasets

In [5]:
label = ['statistic','pvalue','sig']
for dataset in datasets:
    label.append('best_alg_'+dataset)
    label.append('mean_'+dataset)
    label.append('std_'+dataset)
    
kruskal_summary = pd.DataFrame(index=metrics,columns=label)
global_data = []

for metric in metrics:
    best_list = []
    best_data = []
    i = 0
    for dataset in datasets:
        alg_ave = []
        alg_st = []
        alg_data = []
        for algorithm in algorithmsToRun:
            #load results for given dataset/algorithm
            filename = wd_path+baseFolderName+folderSubNames[i]+'/'+output_folder+'/'+algorithm+'_Metrics_'+dataset+'.csv'
            td = pd.read_csv(filename)
            alg_ave.append(td[metric].mean())
            alg_st.append(td[metric].std())
            alg_data.append(td[metric])
            
        #Find best algorithm for given metric based on average
        best_ave = max(alg_ave)
        best_index = alg_ave.index(best_ave)
        best_sd = alg_st[best_index]
        best_alg = algorithmsToRun[best_index]
        best_data.append(alg_data[best_index])
        best_list.append([best_alg,best_ave,best_sd])
        i += 1
    
    global_data.append([best_data, best_list])
    result = stats.kruskal(*best_data)
    kruskal_summary.at[metric, 'statistic'] = str(round(result[0],6))
    kruskal_summary.at[metric, 'pvalue'] = str(round(result[1],6))

    if result[1] < sig_cutoff:
        kruskal_summary.at[metric, 'sig'] = str('*')
    else:
        kruskal_summary.at[metric, 'sig'] = str('')
            
    for j in range(len(best_list)):
        kruskal_summary.at[metric, 'best_alg_'+datasets[j]] = str(best_list[j][0])
        kruskal_summary.at[metric, 'mean_'+datasets[j]] = str(round(best_list[j][1],6))
        kruskal_summary.at[metric, 'std_'+datasets[j]] = str(round(best_list[j][2],6))

kruskal_summary
kruskal_summary.to_csv(wd_path+'BestCompare_KruskalWallis.csv')  

Unnamed: 0,statistic,pvalue,sig,best_alg_EpiOnly_20180710_clean_scale_imp_CV_S_FS,mean_EpiOnly_20180710_clean_scale_imp_CV_S_FS,std_EpiOnly_20180710_clean_scale_imp_CV_S_FS,best_alg_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,mean_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,std_Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,best_alg_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,mean_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS,std_Epi_DietAdj_Matched_20180710_clean_scale_imp_CV_M_FS
Accuracy,20.145463,4.2e-05,*,LCS_QRF,0.844648,0.008045,LCS_QRF,0.849548,0.007751,LCS_QRF,0.663783,0.018093
Balanced Accuracy,20.460681,3.6e-05,*,XGB,0.685425,0.027553,LGB,0.700193,0.033299,naive_bayes,0.572899,0.039524
F1_Score,1.308678,0.519785,,XGB,0.431666,0.036297,random_forest,0.44774,0.032054,SVM,0.443553,0.073854
Precision,3.179082,0.204019,,LCS_QRF,0.529261,0.090845,LCS_QRF,0.572236,0.080163,ANN,0.507618,0.103342
Recall,9.625931,0.008124,*,XGB,0.57375,0.039286,logistic_regression,0.615,0.063955,SVM,0.490685,0.115589
Specificity,8.456503,0.014578,*,LCS_QRF,0.975807,0.010289,LCS_QRF,0.975335,0.008007,LCS_QRF,0.9586,0.020886
TN,19.425469,6.1e-05,*,LCS_QRF,419.4,4.247875,LCS_QRF,419.2,3.614784,LCS_QRF,62.5,3.62859
TP,21.503591,2.1e-05,*,XGB,45.9,3.142893,logistic_regression,49.2,5.116422,SVM,16.2,4.417138
FN,20.45902,3.6e-05,*,LCS_QRF,68.8,2.859681,LCS_QRF,66.1,3.414023,LCS_QRF,30.3,3.164034
FP,22.229451,1.5e-05,*,logistic_regression,91.2,8.599742,decision_tree,117.7,24.376902,SVM,23.3,7.528465


# Best Mann Whitney (Pairwise comparisons)

In [6]:
label = ['metric','dataset1', 'dataset2','statistic','pvalue','sig']

for i in range(1,len(datasets)):
    label.append('best_alg'+str(i))
    label.append('mean_dataset'+str(i))
    label.append('std_dataset'+str(i))
    
#mann_whit_summary = pd.DataFrame(columns=label)
master_list = []
j = 0
for metric in metrics:
    #Cycle through unique combinations of datasets
    for x in range(0,len(datasets)-1):
        for y in range(x+1,len(datasets)):
            tempList = []
            set1 = global_data[j][0][x] 
            ave1 = global_data[j][1][x][1]  
            sd1 = global_data[j][1][x][2] 

            set2 = global_data[j][0][y] 
            ave2 = global_data[j][1][y][1]  
            sd2 = global_data[j][1][y][2] 

            result = (stats.mannwhitneyu(set1,set2))

            tempList.append(str(metric))
            tempList.append(str(datasets[x]))
            tempList.append(str(datasets[y]))
            tempList.append(str(round(result[0],6)))
            tempList.append(str(round(result[1],6)))

            if result[1] < sig_cutoff:
                tempList.append(str('*'))
            else:
                tempList.append(str(''))

            tempList.append(global_data[j][1][x][0])
            tempList.append(str(round(ave1,6)))
            tempList.append(str(round(sd1,6)))
                            
            tempList.append(global_data[j][1][y][0])          
            tempList.append(str(round(ave2,6)))
            tempList.append(str(round(sd2,6)))

            master_list.append(tempList)
    j += 1
# Create the pandas DataFrame  
df = pd.DataFrame(master_list) 
df.columns = label
df
df.to_csv(wd_path+'BestCompare_MannWhitney.csv')  

Unnamed: 0,metric,dataset1,dataset2,statistic,pvalue,sig,best_alg1,mean_dataset1,std_dataset1,best_alg2,mean_dataset2,std_dataset2
0,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,33.0,0.105285,,LCS_QRF,0.844648,0.008045,LCS_QRF,0.849548,0.007751
1,Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,LCS_QRF,0.844648,0.008045,LCS_QRF,0.663783,0.018093
2,Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9e-05,*,LCS_QRF,0.849548,0.007751,LCS_QRF,0.663783,0.018093
3,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,26.5,0.040992,*,XGB,0.685425,0.027553,LGB,0.700193,0.033299
4,Balanced Accuracy,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,1.0,0.000123,*,XGB,0.685425,0.027553,naive_bayes,0.572899,0.039524
5,Balanced Accuracy,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,0.0,9.1e-05,*,LGB,0.700193,0.033299,naive_bayes,0.572899,0.039524
6,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,32.0,0.092856,,XGB,0.431666,0.036297,random_forest,0.44774,0.032054
7,F1_Score,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,46.0,0.395668,,XGB,0.431666,0.036297,SVM,0.443553,0.073854
8,F1_Score,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_Matched_20180710_clean_scale_imp_C...,45.0,0.366817,,random_forest,0.44774,0.032054,SVM,0.443553,0.073854
9,Precision,EpiOnly_20180710_clean_scale_imp_CV_S_FS,Epi_DietAdj_20180710_clean_scale_imp_CV_S_FS,35.5,0.144414,,LCS_QRF,0.529261,0.090845,LCS_QRF,0.572236,0.080163
