In [2]:
# RNN sweep 
# 2021-02-05

# Settings 

# Import master csv file
input_dir = '~/Downloads/'
csv_name = 'New_RNN_sweep_noon.csv'
output_dir = '~/Downloads/'

import pandas as pd
import numpy as np
import os

# Basic data cleaning
rundata = pd.read_csv(os.path.join(input_dir,csv_name))
rundata = rundata[rundata['State']=='finished']


print(len(rundata[rundata['bert_induced']==True]),len(rundata[rundata['bert_induced']==False]))


rundata.columns

5094 744


Index(['Name', 'State', 'Notes', 'User', 'Tags', 'Created', 'Runtime', 'Sweep',
       'batch_size', 'bert_freeze', 'bert_induced', 'bert_model', 'cls_freeze',
       'concat', 'debug', 'device_number', 'dropout', 'embedding_dim',
       'hidden_dim', 'input_path', 'item', 'lr', 'max_length', 'n_epochs',
       'only_BCE', 'path', 'rnn_bidirection', 'rnn_model_type', 'seed',
       'source_file', 'target', 'time_window', 'transformer',
       'transformer_attn_heads', 'transformer_hidden_dim',
       'transformer_layers', 'wandb_project_name', 'eicu_eval_auprc',
       'eicu_eval_auroc', 'eicu_eval_loss', 'eicu_test_auprc',
       'eicu_test_auroc', 'eicu_test_loss', 'eval_auprc', 'eval_auroc',
       'eval_loss', 'mimic_eval_auprc', 'mimic_eval_auroc', 'mimic_eval_loss',
       'mimic_test_auprc', 'mimic_test_auroc', 'mimic_test_loss', 'test_auprc',
       'test_auroc', 'test_loss', 'train_auprc', 'train_auroc', 'train_loss'],
      dtype='object')

In [12]:
def generate_table(result_file:pd.DataFrame, target, seed_count=False, count_flag=True, std=False, rounding=True):
    
    # Conditions: {5 bert models} x {SingleRNN, CLS_finetune} x {4 items} x {5 targets}

    
    # Possible combinations for our models
    conditionlist = [(result_file['bert_induced'] == False),
                     (result_file['bert_induced'] == True) & (result_file['bert_freeze'] == True) & 
                             (result_file['cls_freeze'] == True),
                     (result_file['bert_induced'] == True) & (result_file['bert_freeze'] == True) & 
                             (result_file['cls_freeze'] == False),
                     (result_file['bert_induced'] == True) & (result_file['bert_freeze'] == False)]
    choicelist = ['singleRNN', 'CLSfixed', 'CLSfinetune','BERTfinetune']
    result_file['model_name'] = np.select(conditionlist, choicelist)
    
    
    
    result_file = result_file.loc[:, ['model_name','bert_model','bert_induced','seed', 'source_file', 'target','item',
                                      'eicu_test_auprc', 'mimic_test_auprc', 'test_auprc']]
    
    #print(result_file.loc['bert_model', result_file['model_name']=='singleRNN'].unique())
    result_filter = result_file.loc[result_file['target']==target]
     
    result_mean = pd.pivot_table(result_filter,
                               columns = ['source_file', 'model_name'],
                               index = ['item', 'bert_model'],
                               values = ['test_auprc', 'mimic_test_auprc', 'eicu_test_auprc'],
                               aggfunc=['mean'])
    
    result_std = pd.pivot_table(result_filter,
                               columns = ['source_file', 'model_name'],
                               index = ['item', 'bert_model'],
                               values = ['test_auprc','mimic_test_auprc', 'eicu_test_auprc'],
                               aggfunc=['std'])
    
    result_count = pd.pivot_table(result_filter,
                               columns = ['source_file', 'model_name'],
                               index = ['item', 'bert_model'],
                               values = ['test_auprc','mimic_test_auprc', 'eicu_test_auprc'],
                               aggfunc=['count'])
    
    if count_flag == True:
        if rounding==True:
            RM_drop = result_mean.droplevel([0], axis=1).round(3)
            RS_drop = result_std.droplevel([0], axis=1).round(3)
        elif rounding == False:
            RM_drop = result_mean.droplevel([0], axis=1)
            RS_drop = result_std.droplevel([0], axis=1)
        RC_drop = result_count.droplevel([0], axis=1)
        
        RC_drop = RC_drop[RM_drop.columns]
        
        for col in range(len(RM_drop.columns)): 
            RM_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
            RM_drop.iloc[:,col][RC_drop.iloc[:,col] < 10] = 'under'   
            
            RS_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
            RS_drop.iloc[:,col][RC_drop.iloc[:,col] < 10] = 'under'  
    
    
    if seed_count==False:
        if count_flag == True:
            if std==True:
                return RS_drop
            elif std==False:
                return RM_drop
        elif count_flag == False:
            if std==True:
                return result_std
            elif std==False:
                return result_mean
    
    elif seed_count==True:
        return RC_drop


In [24]:
target_list = ['readmission', 'mortality', 'los>3day', 'los>7day', 'dx_depth1_unique']

keep_seed = False
count_seeds = False
flag = True

for target in target_list:
    if keep_seed:
        generate_table(rundata, target).to_csv(os.path.join(output_dir,'RNNsweep_{}_withseed.csv'.format(target)))
    else:
        if count_seeds==False:
            if flag==True:
                generate_table(rundata, target).to_csv(os.path.join(output_dir,'RNNsweep_{}_flagged.csv'.format(target)))
            elif flag==False:
                generate_table(rundata, target).to_csv(os.path.join(output_dir,'RNNsweep_{}.csv'.format(target)))
        if count_seeds==True:
            generate_table(rundata, target).to_csv(os.path.join(output_dir,'RNNsweep_{}_seedcount.csv'.format(target)))
    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RM_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RM_drop.iloc[:,col][RC_drop.iloc[:,col] < 10] = 'under'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RS_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RS

In [13]:
generate_table(rundata, 'mortality').iloc[:, ::-1]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RM_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RM_drop.iloc[:,col][RC_drop.iloc[:,col] < 10] = 'under'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RS_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RS

Unnamed: 0_level_0,Unnamed: 1_level_0,test_auprc,test_auprc,test_auprc,test_auprc,test_auprc,test_auprc,mimic_test_auprc,mimic_test_auprc,mimic_test_auprc,eicu_test_auprc,eicu_test_auprc,eicu_test_auprc
Unnamed: 0_level_1,source_file,mimic,mimic,mimic,eicu,eicu,eicu,both,both,both,both,both,both
Unnamed: 0_level_2,model_name,singleRNN,CLSfixed,CLSfinetune,singleRNN,CLSfixed,CLSfinetune,singleRNN,CLSfixed,CLSfinetune,singleRNN,CLSfixed,CLSfinetune
item,bert_model,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
all,bert,,under,0.439,,0.139,0.2,,under,0.428,,under,0.199
all,bio_bert,,under,0.436,,0.171,0.205,,under,0.43,,under,0.203
all,bio_clinical_bert,,under,0.443,,0.174,0.206,,under,0.433,,under,0.205
all,blue_bert,,0.396,0.432,,0.181,0.204,,under,0.423,,under,0.21
all,pubmed_bert,0.402,0.433,0.436,0.187,0.167,0.203,0.39,under,0.43,0.188,under,0.201
inf,bert,,,0.277,,,0.265,,,0.279,,,0.26
inf,bio_bert,,,0.278,,,0.264,,,0.277,,,0.256
inf,bio_clinical_bert,,,0.278,,,0.267,,,0.279,,,0.267
inf,blue_bert,,,0.279,,,0.256,,,0.279,,,0.26
inf,pubmed_bert,0.256,,0.282,0.228,,0.266,0.255,,0.28,0.22,,0.245


In [15]:
generate_table(rundata, 'mortality').iloc[:, ::-1].to_latex(multirow=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RM_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RM_drop.iloc[:,col][RC_drop.iloc[:,col] < 10] = 'under'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RS_drop.iloc[:,col][RC_drop.iloc[:,col] > 10] = 'over'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RS

'\\begin{tabular}{llrlrrlrrllrll}\n\\toprule\n    & {} & \\multicolumn{6}{l}{test\\_auprc} & \\multicolumn{3}{l}{mimic\\_test\\_auprc} & \\multicolumn{3}{l}{eicu\\_test\\_auprc} \\\\\n    & source\\_file & \\multicolumn{3}{l}{mimic} & \\multicolumn{3}{l}{eicu} & \\multicolumn{3}{l}{both} & \\multicolumn{3}{l}{both} \\\\\n    & model\\_name &  singleRNN & CLSfixed & CLSfinetune & singleRNN & CLSfixed & CLSfinetune &        singleRNN & CLSfixed & CLSfinetune &       singleRNN & CLSfixed & CLSfinetune \\\\\nitem & bert\\_model &            &          &             &           &          &             &                  &          &             &                 &          &             \\\\\n\\midrule\n\\multirow{5}{*}{all} & bert &        NaN &    under &       0.439 &       NaN &    0.139 &       0.200 &              NaN &    under &       0.428 &             NaN &    under &       0.199 \\\\\n    & bio\\_bert &        NaN &    under &       0.436 &       NaN &    0.171 &       0.205 & 