# Tagging training results

In order to keep track of the training results, I will tag each training with the following information:
![](assets/Overview_training_conditions.png)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from project_code.evaluate.logs import tag_experiment

In [2]:
df = pd.read_excel('outputs/experiments.xlsx')

In [3]:
df.head()

Unnamed: 0,run,done,src_data,task,value_mode,embed_model,model,load_pretrained_weights,init_bert_params,init_bert_params_with_freeze,transfer,epoch,loss,auroc,auprc,tag
0,outputs/2024-04-13/09-54-57,True,mimiciii,readmission,NV,descemb_rnn,ehr_model,False,False,False,False,13.0,1.361,0.505,0.043,unknown
1,outputs/2024-04-13/10-06-26,True,mimiciii,readmission,VA,descemb_rnn,ehr_model,False,False,False,False,14.0,1.353,0.505,0.043,unknown
2,outputs/2024-04-13/10-27-57,True,mimiciii,readmission,VC,descemb_rnn,ehr_model,False,False,False,False,10.0,1.363,0.504,0.043,unknown
3,outputs/2024-04-13/10-38-53,True,mimiciii,mortality,NV,descemb_rnn,ehr_model,False,False,False,False,13.0,1.359,0.503,0.089,unknown
4,outputs/2024-04-13/10-49-36,True,mimiciii,mortality,VA,descemb_rnn,ehr_model,False,False,False,False,9.0,1.385,0.504,0.089,unknown


In [4]:
# Add new column 'tag' to df
df['tag'] = len(df) * ['unknown']
df.head()

Unnamed: 0,run,done,src_data,task,value_mode,embed_model,model,load_pretrained_weights,init_bert_params,init_bert_params_with_freeze,transfer,epoch,loss,auroc,auprc,tag
0,outputs/2024-04-13/09-54-57,True,mimiciii,readmission,NV,descemb_rnn,ehr_model,False,False,False,False,13.0,1.361,0.505,0.043,unknown
1,outputs/2024-04-13/10-06-26,True,mimiciii,readmission,VA,descemb_rnn,ehr_model,False,False,False,False,14.0,1.353,0.505,0.043,unknown
2,outputs/2024-04-13/10-27-57,True,mimiciii,readmission,VC,descemb_rnn,ehr_model,False,False,False,False,10.0,1.363,0.504,0.043,unknown
3,outputs/2024-04-13/10-38-53,True,mimiciii,mortality,NV,descemb_rnn,ehr_model,False,False,False,False,13.0,1.359,0.503,0.089,unknown
4,outputs/2024-04-13/10-49-36,True,mimiciii,mortality,VA,descemb_rnn,ehr_model,False,False,False,False,9.0,1.385,0.504,0.089,unknown


In [5]:
# CodeEmb RD & W2V
df.loc[(df['embed_model'] == 'codeemb') & (df['load_pretrained_weights'] == False), "tag"] = "CodeEmb_RD"
df.loc[(df['embed_model'] == 'codeemb') & (df['load_pretrained_weights'] == True), "tag"] = "CodeEmb_W2V"
# All transfer experiments
df.loc[(df['embed_model'] == 'codeemb') & (df['transfer'] == True), "tag"] = "CodeEmb_Transfer"
df.loc[(df['embed_model'] == 'descemb_bert') & (df['transfer'] == True), "tag"] = "DescEmb-BERT_Transfer"
df.loc[(df['embed_model'] == 'descemb_rnn') & (df['transfer'] == True), "tag"] = "DescEmb-RNN_Transfer"
# All pooled experiments
df.loc[(df['embed_model'] == 'codeemb') & (df['src_data'] == 'pooled'), "tag"] = "CodeEmb-Pooled"
df.loc[(df['embed_model'] == 'descemb_bert') & (df['src_data'] == 'pooled'), "tag"] = "DescEmb-BERT_Pooled"
df.loc[(df['embed_model'] == 'descemb_rnn') & (df['src_data'] == 'pooled'), "tag"] = "DescEmb-RNN_Pooled"
# All MLM pretraining experiments
df.loc[(df['task'] == 'w2v') & (df['model'] == 'codeemb'), "tag"] = "CodeEmb_Pretrain-W2V"
df.loc[(df['task'] == 'mlm') & (df['model'] == 'descemb_bert') & (df['init_bert_params'] == True), "tag"] = "DescEmb-BERT_Pretrain-MLM"
df.loc[(df['task'] == 'mlm') & (df['model'] == 'descemb_rnn') & (df['init_bert_params'] == True), "tag"] = "DescEmb-RNN_Pretrain-MLM"
# CLS-FT
df.loc[(df['model'] == 'ehr_model') & (df['embed_model'] == 'descemb_bert') & (df['init_bert_params_with_freeze'] == True), "tag"] = "DescEmb-BERT_CLS-FT"
# FT
df.loc[(df['model'] == 'ehr_model') & (df['embed_model'] == 'descemb_bert') & (df['init_bert_params'] == True), "tag"] = "DescEmb-BERT_FT"
# FT-MLM
df.loc[(df['model'] == 'ehr_model') & (df['embed_model'] == 'descemb_bert') & (df['load_pretrained_weights'] == True), "tag"] = "DescEmb-BERT_FT-MLM"
# RNN Scr & Scr-MLM
df.loc[(df['model'] == 'ehr_model') & (df['embed_model'] == 'descemb_rnn') & (df['load_pretrained_weights'] == False), "tag"] = "DescEmb-RNN_Scr"
df.loc[(df['model'] == 'ehr_model') & (df['embed_model'] == 'descemb_rnn') & (df['load_pretrained_weights'] == True), "tag"] = "DescEmb-RNN_Scr"

In [6]:
df['tag'].value_counts()

DescEmb-RNN_Scr              101
DescEmb-BERT_FT-MLM           71
DescEmb-BERT_FT               53
DescEmb-BERT_CLS-FT           46
CodeEmb_RD                    15
unknown                        2
DescEmb-BERT_Pretrain-MLM      2
Name: tag, dtype: int64

In [7]:
df.to_excel('outputs/experiments.xlsx', index=False)

In [8]:
df2 = pd.read_excel('outputs/experiments.xlsx')

In [9]:
df2 = tag_experiment(df2)

In [10]:
len(df)*len(df.columns)

4640

In [11]:
(df2 == df).apply(lambda x: x.all())    

run                              True
done                             True
src_data                         True
task                             True
value_mode                       True
embed_model                      True
model                            True
load_pretrained_weights          True
init_bert_params                 True
init_bert_params_with_freeze     True
transfer                         True
epoch                           False
loss                            False
auroc                           False
auprc                           False
tag                              True
dtype: bool

In [12]:
df2.head()

Unnamed: 0,run,done,src_data,task,value_mode,embed_model,model,load_pretrained_weights,init_bert_params,init_bert_params_with_freeze,transfer,epoch,loss,auroc,auprc,tag
0,outputs/2024-04-13/09-54-57,True,mimiciii,readmission,NV,descemb_rnn,ehr_model,False,False,False,False,13.0,1.361,0.505,0.043,DescEmb-RNN_Scr
1,outputs/2024-04-13/10-06-26,True,mimiciii,readmission,VA,descemb_rnn,ehr_model,False,False,False,False,14.0,1.353,0.505,0.043,DescEmb-RNN_Scr
2,outputs/2024-04-13/10-27-57,True,mimiciii,readmission,VC,descemb_rnn,ehr_model,False,False,False,False,10.0,1.363,0.504,0.043,DescEmb-RNN_Scr
3,outputs/2024-04-13/10-38-53,True,mimiciii,mortality,NV,descemb_rnn,ehr_model,False,False,False,False,13.0,1.359,0.503,0.089,DescEmb-RNN_Scr
4,outputs/2024-04-13/10-49-36,True,mimiciii,mortality,VA,descemb_rnn,ehr_model,False,False,False,False,9.0,1.385,0.504,0.089,DescEmb-RNN_Scr
