In [1]:
import pandas as pd
import seaborn as sns
import json

In [2]:
base_result_dir = '../benchmark_results'
TO_EXCLUDE_MODEL = ['lr']

In [3]:
def get_result_df(result_dir, target_metric):
    # print(result_dir)
    with open(f'{result_dir}/results_{target_metric}.json', 'r') as f:
        result = json.load(f)
        
    refined_result = {k: result[k]['cv_avg_scores'] for k in result.keys() if k not in TO_EXCLUDE_MODEL}
    return pd.DataFrame.from_dict(refined_result).T

In [4]:
from itertools import product

_token = 'words'
result_lst = []

_chained_lst = [None, 'chained', 'unchained']
_rule_lst = [False, True]
_feature_select_lst = [None, 'mutual_info_classif', 'chi2']
_sample_method_lst = [None, 'random_over', 'random_under', 'smote']

# get the combination of chained, rule, feature_select

for _sample_method, _chained, _rule, _feature_select in product(_sample_method_lst, _chained_lst, _rule_lst, _feature_select_lst):
    _rule_txt = '_rule_added' if _rule else ''
    
    if _chained is None:
        logging_nm = f'{_sample_method}/no_imputation_{_feature_select}'
        
    else:
        logging_nm = f'{_sample_method}/imputation_{_chained}_{_feature_select}{_rule_txt}'
        
    result_lst.append(logging_nm)
    print(logging_nm)

None/no_imputation_None
None/no_imputation_mutual_info_classif
None/no_imputation_chi2
None/no_imputation_None
None/no_imputation_mutual_info_classif
None/no_imputation_chi2
None/imputation_chained_None
None/imputation_chained_mutual_info_classif
None/imputation_chained_chi2
None/imputation_chained_None_rule_added
None/imputation_chained_mutual_info_classif_rule_added
None/imputation_chained_chi2_rule_added
None/imputation_unchained_None
None/imputation_unchained_mutual_info_classif
None/imputation_unchained_chi2
None/imputation_unchained_None_rule_added
None/imputation_unchained_mutual_info_classif_rule_added
None/imputation_unchained_chi2_rule_added
random_over/no_imputation_None
random_over/no_imputation_mutual_info_classif
random_over/no_imputation_chi2
random_over/no_imputation_None
random_over/no_imputation_mutual_info_classif
random_over/no_imputation_chi2
random_over/imputation_chained_None
random_over/imputation_chained_mutual_info_classif
random_over/imputation_chained_chi2
r

In [5]:
target_metric = 'AUC'
result_all = {}

for dir in result_lst:
    result_dir = f'{base_result_dir}/{dir}'
    result_df = get_result_df(result_dir, target_metric)
    best_ = result_df.sort_values(f"test_auc", ascending=False).reset_index().iloc[0, :]
    key_str = "-".join(dir.split('/')) + "-" + best_['index']
    # print("-".join(dir.split('/')))
    result_all[key_str] = {k: best_[k] for k in best_.keys() if 'test' in k}

In [15]:
result_df_original = pd.DataFrame.from_dict(result_all).T.sort_values('test_auc', ascending=False).reset_index()

In [74]:
# index name order: sample_method, imputation, feature_select, rule, model
# split the index name to get the information and create a new column for each information

def post_process_df(result_df_original):
    index_nm_order = ['sample_method', 'imputation', 'feature_select', 'rule', 'model']
    df = result_df_original.copy(deep=True)

    # create new column for each information
    for nm in index_nm_order:
        df[nm] = None

    final_result_df_col = ["test_auc", "test_precision", "test_recall", "test_f1", "sample_method", "imputation", "featuer_select", "rule", "model"]
    final_result_df = {}

    for idx_num, row in df.iterrows():
        temp_dict = {}
        
        index_nm = row['index']
        # print(index_nm.split('-'))
        
        lst = index_nm.split('-')
        
        sampling = lst[0]
        
        l2_split = []
        
        l2 = lst[1].split("_")
        # print(l2)
        imputation = "_".join(l2[:2])
        l2_split.append(imputation)
        
        l2 = l2[2:]
        
        if l2[-1] == 'added':
            rule_added = "rule_added"
            l2 = l2[:-2]
            
        else:
            rule_added = "no_rule_added"
        
        l2_split.append(rule_added)    
        
        feat_sel = "_".join(l2)
        
        l2_split.append(feat_sel)   

        model = lst[2]
        
        for col in final_result_df_col:
            if col in row.keys():
                temp_dict[col] = row[col]
                
        temp_dict['sample_method'] = sampling
        temp_dict['imputation'] = imputation
        temp_dict['featuer_select'] = feat_sel
        temp_dict['rule'] = rule_added
        temp_dict['model'] = model
        
        final_result_df[idx_num] = temp_dict
            
    final_result_df = pd.DataFrame.from_dict(final_result_df).T
    return final_result_df

final_result_df = post_process_df(result_df_original)
final_result_df.to_csv(f'{base_result_dir}/final_result.csv', index=False)

In [75]:
final_result_df.head()

Unnamed: 0,test_auc,test_precision,test_recall,test_f1,sample_method,imputation,rule,model,featuer_select
0,0.97447,0.801016,0.970022,0.970022,random_over,imputation_chained,rule_added,et,mutual_info_classif
1,0.974363,0.801744,0.97047,0.97047,smote,imputation_chained,rule_added,et,mutual_info_classif
2,0.974326,0.801598,0.970022,0.970022,random_under,imputation_chained,rule_added,et,chi2
3,0.974326,0.801598,0.970022,0.970022,random_under,imputation_chained,rule_added,et,
4,0.974326,0.801598,0.970022,0.970022,,imputation_chained,rule_added,et,


In [76]:
final_result_df.groupby('sample_method').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
sample_method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
smote,0.968226,0.778934,0.967174,0.967174
,0.968192,0.779745,0.967487,0.967487
random_under,0.968123,0.778509,0.967233,0.967233
random_over,0.967687,0.775984,0.967248,0.967248


In [77]:
final_result_df.groupby('imputation').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
imputation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
imputation_chained,0.970981,0.785731,0.968111,0.968111
imputation_unchained,0.966818,0.783899,0.966937,0.966937
no_imputation,0.964689,0.752206,0.966331,0.966331


In [78]:
final_result_df.groupby('rule').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
rule,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rule_added,0.972137,0.793098,0.968661,0.968661
no_rule_added,0.965337,0.768423,0.966368,0.966368


In [84]:
final_result_df.groupby('model').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rf,0.970013,0.77841,0.966376,0.966376
et,0.967666,0.77827,0.967468,0.967468


In [85]:
final_result_df.groupby('featuer_select').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
featuer_select,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
chi2,0.968149,0.780262,0.96745,0.96745
mutual_info_classif,0.968082,0.777534,0.967327,0.967327
,0.96794,0.777083,0.967081,0.967081


# 전체에서 확인

In [88]:
target_metric = 'AUC'
result_all = {}

# concat all the result
for dir in result_lst:
    result_dir = f'{base_result_dir}/{dir}'
    result_df = get_result_df(result_dir, target_metric)
    
    key_str = ["-".join(dir.split('/')) + "-" + result_df.index[i] for i in range(len(result_df))]
    # print(key_str)
    for i in range(len(key_str)):
        result_all[key_str[i]] = result_df.iloc[i, :].to_dict()
    
whole_df = pd.DataFrame.from_dict(result_all).T.sort_values('test_auc', ascending=False).reset_index()
whole_df

Unnamed: 0,index,test_auc,test_precision,test_recall,test_f1
0,random_over-imputation_chained_mutual_info_cla...,0.974470,0.801016,0.970022,0.970022
1,smote-imputation_chained_mutual_info_classif_r...,0.974363,0.801744,0.970470,0.970470
2,None-imputation_chained_mutual_info_classif_ru...,0.974326,0.801598,0.970022,0.970022
3,None-imputation_chained_chi2_rule_added-et,0.974326,0.801598,0.970022,0.970022
4,None-imputation_chained_None_rule_added-et,0.974326,0.801598,0.970022,0.970022
...,...,...,...,...,...
175,random_over-imputation_unchained_chi2-xgb,0.903797,0.557128,0.955705,0.955705
176,None-imputation_unchained_chi2_rule_added-xgb,0.872277,0.358784,0.951678,0.951678
177,smote-no_imputation_chi2-xgb,0.858969,0.341002,0.951678,0.951678
178,None-no_imputation_None-xgb,0.795731,0.167717,0.951678,0.951678


In [89]:
whole_result_processed = post_process_df(whole_df)
whole_result_processed

Unnamed: 0,test_auc,test_precision,test_recall,test_f1,sample_method,imputation,rule,model,featuer_select
0,0.97447,0.801016,0.970022,0.970022,random_over,imputation_chained,rule_added,et,mutual_info_classif
1,0.974363,0.801744,0.97047,0.97047,smote,imputation_chained,rule_added,et,mutual_info_classif
2,0.974326,0.801598,0.970022,0.970022,,imputation_chained,rule_added,et,mutual_info_classif
3,0.974326,0.801598,0.970022,0.970022,,imputation_chained,rule_added,et,chi2
4,0.974326,0.801598,0.970022,0.970022,,imputation_chained,rule_added,et,
...,...,...,...,...,...,...,...,...,...
175,0.903797,0.557128,0.955705,0.955705,random_over,imputation_unchained,no_rule_added,xgb,chi2
176,0.872277,0.358784,0.951678,0.951678,,imputation_unchained,rule_added,xgb,chi2
177,0.858969,0.341002,0.951678,0.951678,smote,no_imputation,no_rule_added,xgb,chi2
178,0.795731,0.167717,0.951678,0.951678,,no_imputation,no_rule_added,xgb,


In [90]:
whole_result_processed.groupby('sample_method').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
sample_method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
random_over,0.949418,0.69784,0.962073,0.962073
random_under,0.948906,0.692782,0.961571,0.961571
smote,0.948457,0.694328,0.962048,0.962048
,0.941859,0.675098,0.961844,0.961844


In [91]:
whole_result_processed.groupby('imputation').mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
imputation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
imputation_chained,0.952997,0.706805,0.96214,0.96214
imputation_unchained,0.944426,0.691249,0.961972,0.961972
no_imputation,0.940955,0.653952,0.961198,0.961198


In [92]:
whole_result_processed.groupby("rule").mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
rule,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rule_added,0.953397,0.711614,0.963184,0.963184
no_rule_added,0.943002,0.67561,0.961018,0.961018


In [93]:
whole_result_processed.groupby("model").mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
et,0.967538,0.775772,0.967151,0.967151
rf,0.965703,0.759814,0.964202,0.964202
xgb,0.908239,0.53445,0.954299,0.954299


In [94]:
whole_result_processed.groupby("featuer_select").mean().sort_values('test_auc', ascending=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0_level_0,test_auc,test_precision,test_recall,test_f1
featuer_select,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
mutual_info_classif,0.94931,0.69448,0.961629,0.961629
,0.947413,0.692389,0.962185,0.962185
chi2,0.944757,0.683167,0.961838,0.961838
