This notebook 1) aggregates and averages the five-fold results and 2) implements the model selection procedure based on the repeated cross-validation results. Final outputs are saved as "select_perf.csv."

In [1]:
#Import packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score, log_loss, roc_auc_score
import pdb
import os

#embed fonts
import matplotlib
matplotlib.rc('pdf', fonttype=42)
import scipy.stats
import glob
import json
from tqdm import tqdm

## Name datasets

In [2]:
dnames = [
    'HtnHeuri','HtnHypoKHeuri','ResHtnHeuri',
    'Htndx','HtnHypoKdx','ResHtndx',
]
dnames_nice = [
               'HTN Heuristic','HTN-Hypokalemia Heuristic',"Resistant HTN Heuristic",
               'HTN Diagnosis',"HTN-Hypokalemia Diagnosis","Resistant HTN Diagnosis",
]
dnames_to_nice = {k:v for k,v in zip(dnames, dnames_nice)}

## Name models

In [3]:
# make nice labels
models = ['RandomForest',
          'DecisionTree',
          'Feat_boolean',
          'GaussianNaiveBayes',
          'LogisticRegression_L2',
          'LogisticRegression_L1'
         ]
model_nice = ['RF',
          'DT',
          'FEAT',
          'GNB',
          'LR L2',
          'LR L1'
             ]
nice_model_labels = {k:v for k,v in zip(models,model_nice)}

## load all results

In [4]:
rdir = '/project/ehr_feat/validation_202205/resultsValidation_103'
frames = []
feat_frames = []

#Read benchmark model results
json_filepaths = tqdm(glob.glob(rdir + '/*/*/*.json'))
                    
for file in json_filepaths:
    with open(file,'r') as of:
        results = json.load(of)
    if 'Feat' in file:
        feat_frames.append(results)
    else:
        frames.append(results)

100%|██████████| 90/90 [00:00<00:00, 1097.43it/s]


In [5]:
df_results = pd.DataFrame.from_records(frames)
feat_df_results = pd.DataFrame.from_records(feat_frames)

for df in [df_results, feat_df_results]:
    print('results columns:',df.columns)    
    print('models:',df.model.unique())
    print('targets:',df.target.unique())

results columns: Index(['accuracy_score_train', 'precision_score_train',
       'average_precision_score_train', 'roc_auc_score_train',
       'accuracy_score_test', 'precision_score_test',
       'average_precision_score_test', 'roc_auc_score_test', 'model', 'target',
       'fold', 'RunID', 'random_state', 'representation', 'size', 'pred',
       'pred_proba'],
      dtype='object')
models: ['LogisticRegression_L1' 'DecisionTree' 'RandomForest'
 'GaussianNaiveBayes' 'LogisticRegression_L2']
targets: ['HtnHypoKHeuri' 'Htndx' 'ResHtnHeuri' 'HtnHypoKdx' 'ResHtndx' 'HtnHeuri']
results columns: Index(['accuracy_score_train', 'precision_score_train',
       'average_precision_score_train', 'roc_auc_score_train',
       'accuracy_score_test', 'precision_score_test',
       'average_precision_score_test', 'roc_auc_score_test', 'model', 'target',
       'fold', 'RunID', 'random_state', 'representation', 'size', 'pred',
       'pred_proba', 'version'],
      dtype='object')
models: ['Feat_bool

## manually add heuristic results

In [6]:
dx_to_heu = {k:v for k,v in zip(dnames[3:],dnames[:3])}
targets = {
            'htn_dx_ia':'Htndx',
            'res_htn_dx_ia':'ResHtndx', 
            'htn_hypok_dx_ia':'HtnHypoKdx', 
            'HTN_heuristic':'HtnHeuri', 
            'res_HTN_heuristic':'ResHtnHeuri',
            'hypoK_heuristic_v4':'HtnHypoKHeuri'
            }
rev_targets = {v:k for k,v in targets.items()}
drop_cols = ['UNI_ID'] + list(targets.keys())

y_trues = {} 
print(dx_to_heu)
frames = []
for target in df_results.target.unique():
     
    target_raw = rev_targets[target]
            
    df_train = pd.read_csv('/project/ehr_feat/validation_202205/Dataset' + str(103) + '/' + target + '/' + target + 'ATrain.csv')
    y_train = df_train[target_raw].values
    df_X_train = df_train.drop(drop_cols,axis=1)  
    df_test = pd.read_csv( '/project/ehr_feat/validation_202205/Dataset' + str(103) + '/' + target + '/' + target + 'ATest.csv')
    df_X_test = df_test.drop(drop_cols,axis=1)  

    y_test = df_test[target_raw].values
    y_trues[target] = {'train':y_train,'test':y_test}
        
    print(
        'phenotype cases:',np.sum(y_test==1)+np.sum(y_train==1),
        'phenotype controls:',np.sum(y_test==0)+np.sum(y_train==0),
          'out of',len(y_test)+len(y_train))

    if target in dx_to_heu.keys():
        heuristic = dx_to_heu[target]
        y_heu = df_test[rev_targets[heuristic]]
        
        frames.append({'model':'Heuristic',
                       'target':target,
                       'RunID':103,
                       'average_precision_score_test': average_precision_score(y_test, y_heu),
                       'precision': np.sum((y_heu==1) & (y_test == 1))/ np.sum(y_heu==1),
                       'recall': np.sum((y_heu==1) & (y_test == 1))/ np.sum(y_test==1),
                       'specificity': np.sum((y_heu==0) & (y_test == 0))/ np.sum(y_test==0),
                       'roc_auc_score_test': roc_auc_score(y_test, y_heu),
                        'pred': y_heu
                      })
        
df_heu = pd.DataFrame.from_records(frames)

{'Htndx': 'HtnHeuri', 'HtnHypoKdx': 'HtnHypoKHeuri', 'ResHtndx': 'ResHtnHeuri'}
phenotype cases: 218 phenotype controls: 981 out of 1199
phenotype cases: 608 phenotype controls: 591 out of 1199
phenotype cases: 200 phenotype controls: 999 out of 1199
phenotype cases: 172 phenotype controls: 1027 out of 1199
phenotype cases: 176 phenotype controls: 1023 out of 1199
phenotype cases: 575 phenotype controls: 624 out of 1199


In [7]:
df_heu

Unnamed: 0,model,target,RunID,average_precision_score_test,precision,recall,specificity,roc_auc_score_test,pred
0,Heuristic,Htndx,103,0.960448,0.979021,0.958904,0.977099,0.968002,0 1 1 1 2 0 3 0 4 0  ...
1,Heuristic,HtnHypoKdx,103,0.703859,0.741379,0.934783,0.935065,0.934924,0 0 1 0 2 0 3 0 4 0  ...
2,Heuristic,ResHtndx,103,0.836133,0.862069,0.961538,0.964444,0.962991,0 0 1 0 2 0 3 0 4 0  ...


In [8]:
y_trues.keys()

dict_keys(['HtnHypoKHeuri', 'Htndx', 'ResHtnHeuri', 'HtnHypoKdx', 'ResHtndx', 'HtnHeuri'])

In [9]:
# df_results = df_results.append(df_heu)

In [10]:
df_results.iloc[0:10,0:10]

Unnamed: 0,accuracy_score_train,precision_score_train,average_precision_score_train,roc_auc_score_train,accuracy_score_test,precision_score_test,average_precision_score_test,roc_auc_score_test,model,target
0,1.0,1.0,1.0,1.0,0.98917,0.95082,0.992356,0.998189,LogisticRegression_L1,HtnHypoKHeuri
1,1.0,1.0,1.0,1.0,0.981949,1.0,0.931844,0.956897,DecisionTree,HtnHypoKHeuri
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,RandomForest,HtnHypoKHeuri
3,0.881779,0.647399,0.629078,0.908625,0.805054,0.527778,0.496155,0.832861,GaussianNaiveBayes,HtnHypoKHeuri
4,1.0,1.0,1.0,1.0,0.935018,0.9,0.945146,0.985514,LogisticRegression_L2,HtnHypoKHeuri
5,0.955531,0.981693,0.991204,0.991314,0.931408,0.944056,0.979222,0.982589,LogisticRegression_L1,Htndx
6,0.965293,0.955508,0.995763,0.996633,0.920578,0.907895,0.944926,0.961388,DecisionTree,Htndx
7,1.0,1.0,1.0,1.0,0.956679,0.952703,0.990639,0.988759,RandomForest,Htndx
8,0.885033,0.963542,0.952807,0.966704,0.902527,0.921986,0.916917,0.940448,GaussianNaiveBayes,Htndx
9,0.978308,0.993304,0.998452,0.998315,0.913357,0.906667,0.97726,0.976838,LogisticRegression_L2,Htndx


In [11]:
feat_df_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 18 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   accuracy_score_train           60 non-null     float64
 1   precision_score_train          60 non-null     float64
 2   average_precision_score_train  60 non-null     float64
 3   roc_auc_score_train            60 non-null     float64
 4   accuracy_score_test            60 non-null     float64
 5   precision_score_test           60 non-null     float64
 6   average_precision_score_test   60 non-null     float64
 7   roc_auc_score_test             60 non-null     float64
 8   model                          60 non-null     object 
 9   target                         60 non-null     object 
 10  fold                           60 non-null     object 
 11  RunID                          60 non-null     int64  
 12  random_state                   60 non-null     int64

In [12]:
for target, dfg in feat_df_results.groupby('target'):
    print('===============')
    print('target:',target)
    print('===============')
    for r in dfg['representation'].values:
        print(r)



target: HtnHeuri
Feature		Weight
offset		-0.52987
float((ICD_hyp_sum<1.500000))		-6.527927
(sum_I10<1.500000)		-2.934863
float((ICD_hyp_sum_norm<0.003045))		-1.850222

Feature		Weight
offset		-0.12038
(ICD_hyp_sum<1.500000)		-7.889209
float((float((float((sum_I10<1.500000))<0.500000))<0.500000))		-3.263864

Feature		Weight
offset		-0.12038
(float((ICD_hyp_sum<1.500000))<0.500000)		7.889209
(float((sum_Essential_HTN<1.500000))<0.500000)		3.263864

Feature		Weight
offset		-0.12038
(ICD_hyp_sum<1.500000)		-7.889209
(sum_I10<1.500000)		-3.263864

Feature		Weight
offset		-0.678006
(ICD_hyp_sum<1.500000)		-10.761395

Feature		Weight
offset		-0.12038
float((float((ICD_hyp_sum<1.500000))<0.500000))		7.889209
(sum_Essential_HTN<1.500000)		-3.263864

Feature		Weight
offset		-0.12038
(ICD_hyp_sum<1.500000)		-7.889209
(sum_Essential_HTN<1.500000)		-3.263864

Feature		Weight
offset		-0.12038
(ICD_hyp_sum<1.500000)		-7.889209
(sum_Essential_HTN<1.500000)		-3.263864

Feature		Weight
offset		-0.12038


## Check counts

In [13]:
df_results.groupby(['model']).size()

model
DecisionTree             6
GaussianNaiveBayes       6
LogisticRegression_L1    6
LogisticRegression_L2    6
RandomForest             6
dtype: int64

In [14]:
feat_df_results.groupby(['model', 'target', 'fold']).size()

model         target         fold
Feat_boolean  HtnHeuri       A       10
              HtnHypoKHeuri  A       10
              HtnHypoKdx     A       10
              Htndx          A       10
              ResHtnHeuri    A       10
              ResHtndx       A       10
dtype: int64

## down-select FEAT models from runs using heuristic procedure

In [15]:
from model_selection import select_feat_models, smallest_of_best_three_quartiles
import pandas as pd

feat_df_results_reduced = select_feat_models(feat_df_results, method= smallest_of_best_three_quartiles)


taking models with APS >= lowest_quartile: 1.0 ...
taking models with APS >= lowest_quartile: 0.9890787637286959 ...
taking models with APS >= lowest_quartile: 0.9266706993887174 ...
taking models with APS >= lowest_quartile: 0.990100921630965 ...
taking models with APS >= lowest_quartile: 0.9661411630301593 ...
taking models with APS >= lowest_quartile: 0.8146692141916847 ...


In [16]:
print(feat_df_results_reduced)
# feat_df_results.target.unique()

   accuracy_score_train  precision_score_train  average_precision_score_train  \
4              1.000000               1.000000                       1.000000   
7              0.989154               0.974684                       0.996516   
4              0.971800               0.890625                       0.943827   
7              0.956616               0.966814                       0.990700   
1              0.978308               0.896104                       0.973782   
2              0.936009               0.773109                       0.821907   

   roc_auc_score_train  accuracy_score_test  precision_score_test  \
4             1.000000             0.996390              0.993056   
7             0.999418             0.978339              0.964286   
4             0.991655             0.945848              0.860465   
7             0.989944             0.942238              0.927632   
1             0.996199             0.960289              0.873016   
2             0.96

In [17]:
# combine dataframes
df_results = pd.concat([df_results, feat_df_results_reduced], axis=0)

In [18]:
df_results.isna().any()
df_results.model.unique()
df_results.groupby(['model', 'target', 'fold']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy_score_train,precision_score_train,average_precision_score_train,roc_auc_score_train,accuracy_score_test,precision_score_test,average_precision_score_test,roc_auc_score_test,RunID,random_state,representation,size,pred,pred_proba,version
model,target,fold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
DecisionTree,HtnHeuri,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
DecisionTree,HtnHypoKHeuri,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
DecisionTree,HtnHypoKdx,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
DecisionTree,Htndx,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
DecisionTree,ResHtnHeuri,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
DecisionTree,ResHtndx,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
Feat_boolean,HtnHeuri,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Feat_boolean,HtnHypoKHeuri,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Feat_boolean,HtnHypoKdx,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Feat_boolean,Htndx,A,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [19]:
df_results.isna().any()

accuracy_score_train             False
precision_score_train            False
average_precision_score_train    False
roc_auc_score_train              False
accuracy_score_test              False
precision_score_test             False
average_precision_score_test     False
roc_auc_score_test               False
model                            False
target                           False
fold                             False
RunID                            False
random_state                     False
representation                   False
size                             False
pred                             False
pred_proba                       False
version                           True
dtype: bool

In [20]:
df_results[df_results.model=='Feat_boolean']

Unnamed: 0,accuracy_score_train,precision_score_train,average_precision_score_train,roc_auc_score_train,accuracy_score_test,precision_score_test,average_precision_score_test,roc_auc_score_test,model,target,fold,RunID,random_state,representation,size,pred,pred_proba,version
4,1.0,1.0,1.0,1.0,0.99639,0.993056,0.993056,0.996269,Feat_boolean,HtnHeuri,A,103,24284,Feature\t\tWeight\noffset\t\t-0.678006\n(ICD_h...,2,"[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.9999791383743286, 0.9999791383743286, 2.087...",0.4.1.post3
7,0.989154,0.974684,0.996516,0.999418,0.978339,0.964286,0.982189,0.996497,Feat_boolean,HtnHypoKHeuri,A,103,16695,Feature\t\tWeight\noffset\t\t-3.19416\n(low_K_...,22,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.002805903786793351, 0.00942965131253004, 0....",0.4.1.post3
4,0.9718,0.890625,0.943827,0.991655,0.945848,0.860465,0.916985,0.982119,Feat_boolean,HtnHypoKdx,A,103,6832,Feature\t\tWeight\noffset\t\t-2.47924\nMed_Pot...,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.04530918970704079, 0.06277342140674591, 0.0...",0.4.1.post3
7,0.956616,0.966814,0.9907,0.989944,0.942238,0.927632,0.989627,0.990144,Feat_boolean,Htndx,A,103,8233,Feature\t\tWeight\noffset\t\t0.658105\n(ICD_hy...,15,"[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[0.856257438659668, 0.9938872456550598, 0.1407...",0.4.1.post3
1,0.978308,0.896104,0.973782,0.996199,0.960289,0.873016,0.959465,0.99008,Feat_boolean,ResHtnHeuri,A,103,14724,Feature\t\tWeight\noffset\t\t-4.04038\n((high_...,17,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.002870905213057995, 0.908036470413208, 0.00...",0.4.1.post3
2,0.936009,0.773109,0.821907,0.965392,0.895307,0.755556,0.763901,0.946368,Feat_boolean,ResHtndx,A,103,16695,Feature\t\tWeight\noffset\t\t-2.45128\nfloat((...,12,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.1994340717792511, 0.9477220177650452, 0.025...",0.4.1.post3


In [21]:
df_results['model_nice'] = df_results['model'].apply(lambda x: nice_model_labels[x])

### export df_results

In [22]:
#Save output to select_perf.csv
if not os.path.exists(rdir):
    os.mkdir(rdir)
df_results.to_csv(rdir + '/select_perf_final.csv')

In [23]:
#Average values by run (average all folds)
df_results['size'] = df_results['size'].astype(float)
df_results_ave = df_results.groupby(['model_nice','target','RunID'], as_index=False).mean()
# df_results_ave = df_results.groupby(['model_nice','target','RunID','selection'], as_index=False).mean()
df_results_ave.head()

Unnamed: 0,model_nice,target,RunID,accuracy_score_train,precision_score_train,average_precision_score_train,roc_auc_score_train,accuracy_score_test,precision_score_test,average_precision_score_test,roc_auc_score_test,random_state,size
0,DT,HtnHeuri,103,1.0,1.0,1.0,1.0,0.99639,0.993056,0.993056,0.996269,14724.0,3.0
1,DT,HtnHypoKHeuri,103,1.0,1.0,1.0,1.0,0.981949,1.0,0.931844,0.956897,14724.0,21.0
2,DT,HtnHypoKdx,103,1.0,1.0,1.0,1.0,0.924188,0.765957,0.635546,0.867495,14724.0,49.0
3,DT,Htndx,103,0.965293,0.955508,0.995763,0.996633,0.920578,0.907895,0.944926,0.961388,14724.0,67.0
4,DT,ResHtnHeuri,103,1.0,1.0,1.0,1.0,0.927798,0.851852,0.718928,0.878287,14724.0,37.0


In [24]:
df_results_ave.model_nice.unique()
df_results_ave[df_results_ave.model_nice=='FEAT'].target.unique()
df_results_ave.head()

Unnamed: 0,model_nice,target,RunID,accuracy_score_train,precision_score_train,average_precision_score_train,roc_auc_score_train,accuracy_score_test,precision_score_test,average_precision_score_test,roc_auc_score_test,random_state,size
0,DT,HtnHeuri,103,1.0,1.0,1.0,1.0,0.99639,0.993056,0.993056,0.996269,14724.0,3.0
1,DT,HtnHypoKHeuri,103,1.0,1.0,1.0,1.0,0.981949,1.0,0.931844,0.956897,14724.0,21.0
2,DT,HtnHypoKdx,103,1.0,1.0,1.0,1.0,0.924188,0.765957,0.635546,0.867495,14724.0,49.0
3,DT,Htndx,103,0.965293,0.955508,0.995763,0.996633,0.920578,0.907895,0.944926,0.961388,14724.0,67.0
4,DT,ResHtnHeuri,103,1.0,1.0,1.0,1.0,0.927798,0.851852,0.718928,0.878287,14724.0,37.0


## Get Bootstrap confidence intervals

In [25]:
np.random.seed(42)

def bootstrap(scorer, y_true, y_pred, ci = 0.95, sample_weight = []):

    n_straps = 1000
    n_samples = len(y_true)
    scores = np.empty(n_straps) 
    
    for i in range(n_straps):
        sample = np.random.randint(0, len(y_true) - 1, size = n_samples)
        if len(sample_weight)>0:
            scores[i] = scorer(y_true[sample], y_pred[sample], sample_weight=sample_weight[sample])
        else:
            scores[i] = scorer(y_true[sample], y_pred[sample])
            
    m = round(np.mean(scores), 2)
    sd = round(np.std(scores), 2)
    interval = (1 - ci) / 2

    print('CI:{} to {}'.format(interval, 1 - interval))
    ci_upper  = round(np.quantile(scores, 1 - interval), 2)
    ci_lower  = round(np.quantile(scores, interval), 2)
    
    return str(m) + ' ({}-{})'.format(ci_lower,ci_upper)
        
    
    

In [26]:
for x in y_trues:
    print(x)

HtnHypoKHeuri
Htndx
ResHtnHeuri
HtnHypoKdx
ResHtndx
HtnHeuri


In [27]:
frames = []

# get boostrap estimates for each model
for (alg, target), dfg in df_results.groupby(['model_nice','target']):
    print(alg,target)
    assert len(dfg) == 1
    if alg == 'Heuristic':
        y_pred = np.array(dfg['pred'].values[0])
    else:
        y_pred = np.array(dfg['pred_proba'].values[0])
        
    y_true = y_trues[target]['test']
    
    if len(y_pred) != len(y_true):
        print('skipping',alg,target)

    print('y_pred:',y_pred.shape,'y_true:',y_true.shape)
    bs_dict = {
        'model_nice': alg,
        'target': target,
        'RunID': 103
    }
    for scorer in [average_precision_score, log_loss, roc_auc_score]: 

        if scorer.__name__ == 'log_loss':
            weights = np.array([1/len(y_true[y_true==i]) for i in y_true])                 
            weights /= np.sum(weights)
            kwargs = {'sample_weight': weights}
        else:
            kwargs={}
#         score_mean, score_std = bootstrap(scorer, y_true, y_pred, **kwargs)

        bs_score = bootstrap(scorer, y_true, y_pred, **kwargs)
        print('\t',scorer.__name__,bs_score)
        if scorer.__name__+'_test' in dfg.columns:
            print('\t',scorer.__name__,round(dfg[scorer.__name__+'_test'].values[0],2))
            
        bs_dict[scorer.__name__ + '_bs'] = bs_score
    print(bs_dict)
    frames.append(bs_dict)
    
df_bs = pd.DataFrame.from_records(frames, columns=['model_nice','target','RunID',
                                                  'average_precision_score_bs', 
                                                   'log_loss_bs', 
                                                   'roc_auc_score_bs'])


DT HtnHeuri
y_pred: (277,) y_true: (277,)
CI:0.025000000000000022 to 0.975
	 average_precision_score 0.99 (0.98-1.0)
	 average_precision_score 0.99
CI:0.025000000000000022 to 0.975
	 log_loss 0.13 (0.0-0.39)
CI:0.025000000000000022 to 0.975
	 roc_auc_score 1.0 (0.99-1.0)
	 roc_auc_score 1.0
{'model_nice': 'DT', 'target': 'HtnHeuri', 'RunID': 103, 'average_precision_score_bs': '0.99 (0.98-1.0)', 'log_loss_bs': '0.13 (0.0-0.39)', 'roc_auc_score_bs': '1.0 (0.99-1.0)'}
DT HtnHypoKHeuri
y_pred: (277,) y_true: (277,)
CI:0.025000000000000022 to 0.975
	 average_precision_score 0.93 (0.87-0.98)
	 average_precision_score 0.93
CI:0.025000000000000022 to 0.975
	 log_loss 1.5 (0.31-2.92)
CI:0.025000000000000022 to 0.975
	 roc_auc_score 0.96 (0.92-0.99)
	 roc_auc_score 0.96
{'model_nice': 'DT', 'target': 'HtnHypoKHeuri', 'RunID': 103, 'average_precision_score_bs': '0.93 (0.87-0.98)', 'log_loss_bs': '1.5 (0.31-2.92)', 'roc_auc_score_bs': '0.96 (0.92-0.99)'}
DT HtnHypoKdx
y_pred: (277,) y_true: (277,)

In [28]:
df_bs.sort_values(by=['target','model_nice'])

Unnamed: 0,model_nice,target,RunID,average_precision_score_bs,log_loss_bs,roc_auc_score_bs
0,DT,HtnHeuri,103,0.99 (0.98-1.0),0.13 (0.0-0.39),1.0 (0.99-1.0)
6,FEAT,HtnHeuri,103,0.99 (0.98-1.0),0.04 (0.0-0.12),1.0 (0.99-1.0)
12,GNB,HtnHeuri,103,0.93 (0.88-0.97),2.73 (1.77-3.83),0.96 (0.93-0.98)
18,LR L1,HtnHeuri,103,1.0 (1.0-1.0),0.08 (0.05-0.1),1.0 (1.0-1.0)
24,LR L2,HtnHeuri,103,1.0 (1.0-1.0),0.08 (0.05-0.11),1.0 (1.0-1.0)
30,RF,HtnHeuri,103,1.0 (1.0-1.0),0.03 (0.02-0.04),1.0 (1.0-1.0)
1,DT,HtnHypoKHeuri,103,0.93 (0.87-0.98),1.5 (0.31-2.92),0.96 (0.92-0.99)
7,FEAT,HtnHypoKHeuri,103,0.98 (0.95-1.0),0.07 (0.03-0.11),1.0 (0.99-1.0)
13,GNB,HtnHypoKHeuri,103,0.5 (0.39-0.61),7.88 (5.61-10.05),0.83 (0.78-0.88)
19,LR L1,HtnHypoKHeuri,103,0.99 (0.97-1.0),0.04 (0.02-0.09),1.0 (0.99-1.0)


In [29]:
df_results_ave = df_results_ave.merge(df_bs, 
                                      on=['model_nice','target','RunID'],
                                      how='outer')

In [30]:
df_results_ave

Unnamed: 0,model_nice,target,RunID,accuracy_score_train,precision_score_train,average_precision_score_train,roc_auc_score_train,accuracy_score_test,precision_score_test,average_precision_score_test,roc_auc_score_test,random_state,size,average_precision_score_bs,log_loss_bs,roc_auc_score_bs
0,DT,HtnHeuri,103,1.0,1.0,1.0,1.0,0.99639,0.993056,0.993056,0.996269,14724.0,3.0,0.99 (0.98-1.0),0.13 (0.0-0.39),1.0 (0.99-1.0)
1,DT,HtnHypoKHeuri,103,1.0,1.0,1.0,1.0,0.981949,1.0,0.931844,0.956897,14724.0,21.0,0.93 (0.87-0.98),1.5 (0.31-2.92),0.96 (0.92-0.99)
2,DT,HtnHypoKdx,103,1.0,1.0,1.0,1.0,0.924188,0.765957,0.635546,0.867495,14724.0,49.0,0.63 (0.48-0.76),4.57 (2.47-7.01),0.86 (0.8-0.92)
3,DT,Htndx,103,0.965293,0.955508,0.995763,0.996633,0.920578,0.907895,0.944926,0.961388,14724.0,67.0,0.95 (0.91-0.98),1.12 (0.48-1.89),0.96 (0.94-0.98)
4,DT,ResHtnHeuri,103,1.0,1.0,1.0,1.0,0.927798,0.851852,0.718928,0.878287,14724.0,37.0,0.73 (0.62-0.83),4.07 (2.3-6.09),0.88 (0.83-0.93)
5,DT,ResHtndx,103,0.966377,0.934579,0.949715,0.992077,0.841155,0.642857,0.505531,0.78953,14724.0,77.0,0.51 (0.36-0.64),6.14 (3.86-8.44),0.79 (0.72-0.86)
6,FEAT,HtnHeuri,103,1.0,1.0,1.0,1.0,0.99639,0.993056,0.993056,0.996269,24284.0,2.0,0.99 (0.98-1.0),0.04 (0.0-0.12),1.0 (0.99-1.0)
7,FEAT,HtnHypoKHeuri,103,0.989154,0.974684,0.996516,0.999418,0.978339,0.964286,0.982189,0.996497,16695.0,22.0,0.98 (0.95-1.0),0.07 (0.03-0.11),1.0 (0.99-1.0)
8,FEAT,HtnHypoKdx,103,0.9718,0.890625,0.943827,0.991655,0.945848,0.860465,0.916985,0.982119,6832.0,17.0,0.92 (0.85-0.96),0.26 (0.18-0.36),0.98 (0.97-0.99)
9,FEAT,Htndx,103,0.956616,0.966814,0.9907,0.989944,0.942238,0.927632,0.989627,0.990144,8233.0,15.0,0.99 (0.98-1.0),0.14 (0.09-0.21),0.99 (0.98-1.0)


## make docx table

In [32]:
from docx import Document
from docx.shared import Inches, Pt

document = Document()

table = document.add_table(rows=1, cols=6)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Phenotype'
hdr_cells[1].text = 'Method'
hdr_cells[2].text = 'Test Bal. Log Loss'
hdr_cells[3].text = 'Test AUPRC'
hdr_cells[4].text = 'Test AUROC'
hdr_cells[5].text = 'Size'

# for target, dft in df_results_ave.groupby('target'):
i = 0
for target in dnames:
    print(target)
    dft = df_results_ave.loc[df_results_ave.target == target]
    j = 0
    models = ['GNB','DT','LR L1','LR L2','RF','FEAT']
    for model in models:
        dftm = dft.loc[dft.model_nice == model]
        i += 1
        j += 1
        table.add_row()
        cells = table.rows[i].cells
        if j == 1:
            cells[0].text = dnames_to_nice[target]
#         assert (len(dftm) == 1), dftm
        cells[1].text = model
        if(len(dftm) == 1):
            cells[2].text = '{}'.format( dftm['log_loss_bs'].values[0])
            cells[3].text = '{}'.format( dftm['average_precision_score_bs'].values[0])
            cells[4].text = '{}'.format( dftm['roc_auc_score_bs'].values[0])
            cells[5].text = '{}'.format( int(dftm['size'].values[0]))
        else:
            cells[2].text = '?'
            cells[3].text = '?'
            cells[4].text = '?'
            cells[5].text = '?'
        print('-','\t\t'.join([c.text for c in cells[1:]]))
    print(50*'=') 
# for qty, id, desc in records:
#     row_cells = table.add_row().cells
#     row_cells[0].text = str(qty)
#     row_cells[1].text = id
#     row_cells[2].text = desc

document.add_page_break()

if not os.path.exists(rdir + '/tables'):
    os.mkdir(rdir + '/tables')
document.save(rdir + '/tables/Table_Internal_External_Models.docx')

HtnHeuri
- GNB		2.73 (1.77-3.83)		0.93 (0.88-0.97)		0.96 (0.93-0.98)		331
- DT		0.13 (0.0-0.39)		0.99 (0.98-1.0)		1.0 (0.99-1.0)		3
- LR L1		0.08 (0.05-0.1)		1.0 (1.0-1.0)		1.0 (1.0-1.0)		16
- LR L2		0.08 (0.05-0.11)		1.0 (1.0-1.0)		1.0 (1.0-1.0)		331
- RF		0.03 (0.02-0.04)		1.0 (1.0-1.0)		1.0 (1.0-1.0)		25166
- FEAT		0.04 (0.0-0.12)		0.99 (0.98-1.0)		1.0 (0.99-1.0)		2
HtnHypoKHeuri
- GNB		7.88 (5.61-10.05)		0.5 (0.39-0.61)		0.83 (0.78-0.88)		331
- DT		1.5 (0.31-2.92)		0.93 (0.87-0.98)		0.96 (0.92-0.99)		21
- LR L1		0.04 (0.02-0.09)		0.99 (0.97-1.0)		1.0 (0.99-1.0)		80
- LR L2		0.27 (0.15-0.41)		0.94 (0.89-0.98)		0.99 (0.97-0.99)		331
- RF		0.03 (0.02-0.04)		1.0 (1.0-1.0)		1.0 (1.0-1.0)		1364
- FEAT		0.07 (0.03-0.11)		0.98 (0.95-1.0)		1.0 (0.99-1.0)		22
ResHtnHeuri
- GNB		4.29 (2.78-6.03)		0.66 (0.55-0.78)		0.92 (0.89-0.95)		331
- DT		4.07 (2.3-6.09)		0.73 (0.62-0.83)		0.88 (0.83-0.93)		37
- LR L1		0.75 (0.33-1.25)		0.9 (0.83-0.96)		0.96 (0.92-0.99)		140
- LR L2		0.38 (0.2-0.62)		0.91 