In [1]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib
import os
from IPython.display import display
matplotlib.use('Agg')
%matplotlib inline
import pandas as pd
pd.set_option('precision', 4)
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

from gather_metrics import *
def get_ranked_by_means_df(gather_df, metric_list):
    means_df = gather_df.xs(['test_metrics', 'Folds Mean'], level=['set','fold'])
    ranked_df = pd.DataFrame(data=np.zeros(shape=means_df.shape), columns=metric_list, dtype=str)
    for m, i in zip(metric_list, range(len(metric_list))):
        metric_df = means_df[m]
        sorted_df = metric_df.sort_values(ascending=False)
        mscore_list = sorted_df.tolist()
        models_ranked = sorted_df.index.tolist()
        m_order_mscore_list = [str(m) + ", " + str(round(r,4)) for m, r in zip(models_ranked, mscore_list)]
        ranked_df.iloc[:,i] = m_order_mscore_list
    return ranked_df

In [2]:
# load stage CV
model_directory = '../../../job_results_small/'
class_dirs = [model_directory+'/random_forest/stage_1/',
              model_directory+'/irv/stage_1/',
              model_directory+'/neural_networks/stage_1/',
              model_directory+'/docking/stage_1/']
k = 5
gather_df_2 = gather_dir_metrics(class_dirs[0], k)     
gather_df_2 = gather_df_2.append(gather_dir_metrics(class_dirs[1], k))   
gather_df_2 = gather_df_2.append(gather_dir_metrics(class_dirs[2], 20))
gather_df_2 = gather_df_2.append(gather_dir_metrics(class_dirs[3], k))

col_indices = list(range(10)) + list(range(15, 20)) + list(range(25, 105)) + list(range(145, 149)) + list(range(150, 183))
col_indices = [i for i in col_indices if 'Mean' not in gather_df_2.columns[i] and 'Median' not in gather_df_2.columns[i]]
col_indices = [i for i in col_indices if 'RMI-FANCM1' in gather_df_2.columns[i]]
metric_names = list(gather_df_2.columns.values[col_indices])

gather_df_2 = gather_df_2[metric_names]
gather_df_2 = gather_df_2.dropna(axis=0)

gather_df_cv = gather_df_2

#setup comparison matrix and overlaps
dtk_dict = dtk_multi_metrics(gather_df_cv, list(range(len(col_indices))))
dtk_analysis_dict = analyze_dtk_dict(dtk_dict)
mm_comp_dicts = get_mean_median_comps(gather_df_cv, list(range(len(col_indices))))
agg_comp_dict = get_agg_comp(mm_comp_dicts, dtk_analysis_dict, mean_w=1, median_w=0, tukey_w=1)

---
## Compute Best Models for Each Metric Based on Mean VS DTK+Mean

In [3]:
simple_ranked_cv = get_ranked_by_means_df(gather_df_cv, metric_names)
complex_ranked_cv = get_model_ordering(agg_comp_dict, metric_names)

top_n = 1
req_metrics = ['ROC AUC RMI-FANCM1', 'BEDROC AUC RMI-FANCM1','PR auc.integral RMI-FANCM1', 
               'NEF AUC RMI-FANCM1', 'NEF_1 % RMI-FANCM1', 'EF_1 % RMI-FANCM1']

simple_top_n = simple_ranked_cv[req_metrics].iloc[:top_n,:]
complex_top_n = complex_ranked_cv[req_metrics].iloc[:top_n,:]

### Ranking by Folds Mean Only

In [4]:
display('Ranking by Means. Shows model-mean pairs.', simple_ranked_cv[req_metrics])

'Ranking by Means. Shows model-mean pairs.'

Unnamed: 0,ROC AUC RMI-FANCM1,BEDROC AUC RMI-FANCM1,PR auc.integral RMI-FANCM1,NEF AUC RMI-FANCM1,NEF_1 % RMI-FANCM1,EF_1 % RMI-FANCM1
0,"RandomForest_h, 0.8346","RandomForest_h, 0.5114","RandomForest_h, 0.1237","RandomForest_h, 0.6071","RandomForest_g, 0.3127","RandomForest_g, 31.2692"
1,"RandomForest_g, 0.8306","RandomForest_g, 0.5045","RandomForest_g, 0.1225","RandomForest_g, 0.5991","RandomForest_h, 0.3082","RandomForest_h, 30.8248"
2,"RandomForest_e, 0.8256","RandomForest_e, 0.5001","RandomForest_e, 0.1224","RandomForest_e, 0.596","RandomForest_d, 0.2997","RandomForest_d, 29.9737"
3,"RandomForest_d, 0.8225","RandomForest_d, 0.4967","RandomForest_b, 0.1219","RandomForest_d, 0.5919","RandomForest_e, 0.2996","RandomForest_e, 29.9644"
4,"RandomForest_f, 0.8207","RandomForest_b, 0.4908","RandomForest_a, 0.1218","RandomForest_a, 0.5722","RandomForest_a, 0.2953","RandomForest_a, 29.5297"
5,"RandomForest_c, 0.8161","RandomForest_a, 0.4898","RandomForest_d, 0.1213","RandomForest_b, 0.572","RandomForest_c, 0.291","RandomForest_c, 29.1041"
6,"RandomForest_b, 0.8159","RandomForest_c, 0.4897","RandomForest_c, 0.1208","RandomForest_c, 0.5712","RandomForest_b, 0.291","RandomForest_b, 29.1041"
7,"RandomForest_a, 0.8157","RandomForest_f, 0.4637","IRV_d, 0.0841","RandomForest_f, 0.5546","RandomForest_f, 0.2868","RandomForest_f, 28.6782"
8,"MultiClassification_b, 0.7532","IRV_e, 0.4356","SingleClassification_b, 0.0824","IRV_e, 0.4994","IRV_d, 0.2867","IRV_d, 28.6693"
9,"IRV_e, 0.7463","IRV_d, 0.4324","IRV_e, 0.0752","IRV_d, 0.4646","IRV_e, 0.2822","IRV_e, 28.2249"


### Ranking by DTK+Means

In [5]:
display('Ranking by DTK+Means. Shows model-rank pairs.', complex_ranked_cv[req_metrics])

'Ranking by DTK+Means. Shows model-rank pairs.'

Unnamed: 0,ROC AUC RMI-FANCM1,BEDROC AUC RMI-FANCM1,PR auc.integral RMI-FANCM1,NEF AUC RMI-FANCM1,NEF_1 % RMI-FANCM1,EF_1 % RMI-FANCM1
0,"RandomForest_h, 1.0","RandomForest_h, 1.0","SingleClassification_b, 1.0","RandomForest_h, 1.0","RandomForest_h, 1.0","RandomForest_h, 1.0"
1,"RandomForest_g, 2.0","RandomForest_g, 2.0","SingleClassification_a, 2.0","RandomForest_g, 2.0","RandomForest_g, 1.0","RandomForest_g, 1.0"
2,"RandomForest_e, 3.0","RandomForest_e, 3.0","RandomForest_h, 2.0","RandomForest_e, 3.0","RandomForest_e, 3.0","RandomForest_e, 3.0"
3,"RandomForest_d, 4.0","RandomForest_d, 4.0","RandomForest_g, 4.0","RandomForest_d, 3.0","RandomForest_d, 3.0","RandomForest_d, 3.0"
4,"RandomForest_c, 5.0","RandomForest_b, 5.0","RandomForest_e, 5.0","RandomForest_a, 5.0","RandomForest_a, 5.0","RandomForest_a, 5.0"
5,"RandomForest_f, 6.0","RandomForest_c, 6.0","RandomForest_b, 6.0","RandomForest_b, 6.0","RandomForest_f, 6.0","RandomForest_f, 6.0"
6,"RandomForest_b, 6.0","RandomForest_a, 6.0","RandomForest_a, 7.0","RandomForest_c, 7.0","RandomForest_c, 6.0","RandomForest_c, 6.0"
7,"RandomForest_a, 8.0","RandomForest_f, 8.0","RandomForest_d, 8.0","RandomForest_f, 8.0","RandomForest_b, 6.0","RandomForest_b, 6.0"
8,"MultiClassification_b, 9.0","IRV_d, 9.0","RandomForest_c, 9.0","MultiClassification_b, 9.0","SingleClassification_b, 9.0","IRV_d, 9.0"
9,"IRV_e, 10.0","IRV_e, 10.0","MultiClassification_b, 9.0","IRV_e, 10.0","IRV_d, 10.0","SingleClassification_b, 9.0"


---
## Regret of best model

In [6]:
for m in req_metrics:
    simple_model = simple_top_n[m].iloc[0]
    simple_model = simple_model[:simple_model.index(',')]
    simple_top_n[m].iloc[0] = simple_model
    complex_model = complex_top_n[m].iloc[0]
    complex_model = complex_model[:complex_model.index(',')]
    complex_top_n[m].iloc[0] = complex_model

In [7]:
final_df = simple_top_n.T
final_df = pd.merge(final_df, complex_top_n.T, left_index=True, right_index=True)
final_df.columns = ['Best by Mean','Best by DTK+Mean']
final_df.columns.name = 'Metric'
final_df.index = [s.replace(' PriA-SSB FP', '') for s in final_df.index]

display(final_df)

Metric,Best by Mean,Best by DTK+Mean
ROC AUC RMI-FANCM1,RandomForest_h,RandomForest_h
BEDROC AUC RMI-FANCM1,RandomForest_h,RandomForest_h
PR auc.integral RMI-FANCM1,RandomForest_h,SingleClassification_b
NEF AUC RMI-FANCM1,RandomForest_h,RandomForest_h
NEF_1 % RMI-FANCM1,RandomForest_g,RandomForest_h
EF_1 % RMI-FANCM1,RandomForest_g,RandomForest_h
