# Model evaluation

In this notebook, the transformer-based models analyzed in this work are evaluated according to their clinical coding performance achieved in the CodiEsp-P task.

In [1]:
# Auxiliary components
import sys
sys.path.append("../../")
from nlp_utils import *

In [3]:
def compute_metrics_avg(thr, doc_preds_arr):
    """
    Custom function that returns a pd.Series containing the average P, R, F1, MAP values obtained for 
    a set of different predictions using a single threshold.
    """
    
    p_arr, r_arr, f1_arr, m_arr = [], [], [], []
    for doc_preds in doc_preds_arr:
        df_test_pred = thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                                                     doc_list=test_doc_list, thr=thr)
        df_test_pred[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)
        p, r, f1 = compute_p_r_f1(gs_path, test_pred_file_path, valid_path)
        m = round(compute_map(valid_codes=valid_d_codes, pred=df_test_pred, gs_out_path=gs_out_path), 3)
        p_arr.append(p)
        r_arr.append(r)
        f1_arr.append(f1)
        m_arr.append(m)

    return pd.Series({'P': np.mean(p_arr), 'P_std': np.std(p_arr), 'P_max': np.max(p_arr), 
                      'R': np.mean(r_arr), 'R_std': np.std(r_arr), 'R_max': np.max(r_arr), 
                      'F1': np.mean(f1_arr), 'F1_std': np.std(f1_arr), 'F1_max': np.max(f1_arr), 
                      'Thr': thr}).apply(lambda x: round(x, 3))

In [2]:
# Both arrays were saved in CodiEsp-P_XLM-R_Fine-Tuning.ipynb
test_doc_list = np.load("test_docs.npy")
codes = np.load("classes.npy")

## Load models predictions

We load the coding predictions made by the models at sentence-level, and we further convert them to document-level predictions.

In [3]:
preds_dir = "predictions/"
rnd_seeds = list(range(5))

### XLM-R

In [5]:
model_name = "xlm_r"
xlmr_test_frags = np.load(model_name + "_test_frags.npy")
xlmr_preds = [max_fragment(y_frag_pred=np.load(preds_dir + model_name + "_seed_" + str(i) + "_test_preds.npy"), 
                            n_fragments=xlmr_test_frags) for i in rnd_seeds]

### XLM-R-Galén

In [7]:
model_name = "xlm_r_galen"
xlmr_galen_preds = [max_fragment(y_frag_pred=np.load(preds_dir + model_name + "_seed_" + str(i) + "_test_preds.npy"), 
                            n_fragments=xlmr_test_frags) for i in rnd_seeds]

## MAP

Firstly, we evaluate the models according to the main evaluation metric of the task, the Mean Average Precision (MAP) score.

In [None]:
# Load valid codes
codes_path = "../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv"
valid_d_codes = set(pd.read_csv(codes_path, sep='\t', header=None, 
                              usecols=[0])[0].tolist())
valid_d_codes = set([x.lower() for x in valid_d_codes])

In [None]:
def compute_map_avg(doc_preds_arr, codes_labels, docs, valid_codes, gs_out_path):
    """
    Computes the average, std and max MAP scores for a given array of document-level predictions. 
    """
    map_arr = []
    for doc_preds in doc_preds_arr:
        df_pred = prob_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=codes_labels, 
                                                 doc_list=docs)
        map_arr.append(round(compute_map(valid_codes=valid_codes, pred=df_pred, gs_out_path=gs_out_path), 3))
    return pd.Series({'MAP': np.mean(map_arr), 'MAP_std': np.std(map_arr), 
                      'MAP_max': np.max(map_arr)}).apply(lambda x: round(x, 3))

In [None]:
# here guille now

In [97]:
test_pred_file_path = "proc_test_frag_preds.tsv"

We load the sentence-predictions from the different models, and generate the predictions at document level, using Max criterion:

In [98]:
%%time
multi_test_preds = np.load('Paper_Executions/p_multi_ss_test_preds_map_475.npy')

CPU times: user 2.95 ms, sys: 608 µs, total: 3.56 ms
Wall time: 3.29 ms


In [99]:
multi_test_preds.shape

(3955, 727)

In [100]:
multi_doc_preds = max_fragment(y_frag_pred=multi_test_preds, n_fragments=test_frag)

In [101]:
multi_doc_preds.shape

(250, 727)

In [102]:
%%time
multi_1_test_preds = np.load('Paper_Executions/p_multi_ss_test_preds_map_469.npy')

CPU times: user 0 ns, sys: 3.97 ms, total: 3.97 ms
Wall time: 3.64 ms


In [103]:
multi_1_test_preds.shape

(3955, 727)

In [104]:
multi_1_doc_preds = max_fragment(y_frag_pred=multi_1_test_preds, n_fragments=test_frag)

In [105]:
multi_1_doc_preds.shape

(250, 727)

In [106]:
%%time
multi_2_test_preds = np.load('Paper_Executions/p_multi_ss_test_preds_map_468.npy')

CPU times: user 0 ns, sys: 3.88 ms, total: 3.88 ms
Wall time: 3.54 ms


In [107]:
multi_2_test_preds.shape

(3955, 727)

In [108]:
multi_2_doc_preds = max_fragment(y_frag_pred=multi_2_test_preds, n_fragments=test_frag)

In [109]:
multi_2_doc_preds.shape

(250, 727)

In [110]:
%%time
multi_3_test_preds = np.load('Paper_Executions/p_multi_ss_test_preds_map_467.npy')

CPU times: user 0 ns, sys: 3.49 ms, total: 3.49 ms
Wall time: 3.18 ms


In [111]:
multi_3_test_preds.shape

(3955, 727)

In [112]:
multi_3_doc_preds = max_fragment(y_frag_pred=multi_3_test_preds, n_fragments=test_frag)

In [113]:
multi_3_doc_preds.shape

(250, 727)

In [114]:
%%time
multi_4_test_preds = np.load('Paper_Executions/p_multi_ss_test_preds_map_466.npy')

CPU times: user 3.16 ms, sys: 657 µs, total: 3.82 ms
Wall time: 3.48 ms


In [115]:
multi_4_test_preds.shape

(3955, 727)

In [116]:
multi_4_doc_preds = max_fragment(y_frag_pred=multi_4_test_preds, n_fragments=test_frag)

In [117]:
multi_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [118]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=multi_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 236 ms, sys: 10.6 ms, total: 247 ms
Wall time: 246 ms


In [119]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.475

CPU times: user 19.3 ms, sys: 8.8 ms, total: 28.1 ms
Wall time: 1.44 s


In [120]:
res = [0.475, 0.469, 0.468, 0.467, 0.466]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.46900000000000003 0.003162277660168361
0.469 0.003


In [121]:
%%time
multi_galen_test_preds = np.load('Paper_Executions/p_multi_galen_ss_test_preds_map_508.npy')

CPU times: user 3.21 ms, sys: 16.3 ms, total: 19.5 ms
Wall time: 15.8 ms


In [122]:
multi_galen_test_preds.shape

(3955, 727)

In [123]:
multi_galen_doc_preds = max_fragment(y_frag_pred=multi_galen_test_preds, n_fragments=test_frag)

In [124]:
multi_galen_doc_preds.shape

(250, 727)

In [125]:
%%time
multi_galen_1_test_preds = np.load('Paper_Executions/p_multi_galen_ss_test_preds_map_494.npy')

CPU times: user 402 µs, sys: 4.2 ms, total: 4.61 ms
Wall time: 4.26 ms


In [126]:
multi_galen_1_test_preds.shape

(3955, 727)

In [127]:
multi_galen_1_doc_preds = max_fragment(y_frag_pred=multi_galen_1_test_preds, n_fragments=test_frag)

In [128]:
multi_galen_1_doc_preds.shape

(250, 727)

In [129]:
%%time
multi_galen_2_test_preds = np.load('Paper_Executions/p_multi_galen_ss_test_preds_map_493.npy')

CPU times: user 0 ns, sys: 3.94 ms, total: 3.94 ms
Wall time: 3.61 ms


In [130]:
multi_galen_2_test_preds.shape

(3955, 727)

In [131]:
multi_galen_2_doc_preds = max_fragment(y_frag_pred=multi_galen_2_test_preds, n_fragments=test_frag)

In [132]:
multi_galen_2_doc_preds.shape

(250, 727)

In [133]:
%%time
multi_galen_3_test_preds = np.load('Paper_Executions/p_multi_galen_ss_test_preds_map_49_2.npy')

CPU times: user 0 ns, sys: 3.8 ms, total: 3.8 ms
Wall time: 3.49 ms


In [134]:
multi_galen_3_test_preds.shape

(3955, 727)

In [135]:
multi_galen_3_doc_preds = max_fragment(y_frag_pred=multi_galen_3_test_preds, n_fragments=test_frag)

In [136]:
multi_galen_3_doc_preds.shape

(250, 727)

In [137]:
%%time
multi_galen_4_test_preds = np.load('Paper_Executions/p_multi_galen_ss_test_preds_map_49.npy')

CPU times: user 0 ns, sys: 3.94 ms, total: 3.94 ms
Wall time: 3.63 ms


In [138]:
multi_galen_4_test_preds.shape

(3955, 727)

In [139]:
multi_galen_4_doc_preds = max_fragment(y_frag_pred=multi_galen_4_test_preds, n_fragments=test_frag)

In [140]:
multi_galen_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [141]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=multi_galen_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 240 ms, sys: 6.74 ms, total: 247 ms
Wall time: 246 ms


In [142]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.508

CPU times: user 19.2 ms, sys: 8.72 ms, total: 27.9 ms
Wall time: 1.47 s


In [143]:
res = [0.508, 0.494, 0.493, 0.49, 0.49]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.495 0.00669328021227261
0.495 0.007


In [144]:
%%time
multi_mimic_test_preds = np.load('Paper_Executions/p_multi_mimic_ss_test_preds_map_468.npy')

CPU times: user 654 µs, sys: 8.38 ms, total: 9.03 ms
Wall time: 8.25 ms


In [145]:
multi_mimic_test_preds.shape

(3955, 727)

In [146]:
multi_mimic_doc_preds = max_fragment(y_frag_pred=multi_mimic_test_preds, n_fragments=test_frag)

In [147]:
multi_mimic_doc_preds.shape

(250, 727)

In [148]:
%%time
multi_mimic_1_test_preds = np.load('Paper_Executions/p_multi_mimic_ss_test_preds_map_465.npy')

CPU times: user 1.1 ms, sys: 4.35 ms, total: 5.45 ms
Wall time: 4.91 ms


In [149]:
multi_mimic_1_test_preds.shape

(3955, 727)

In [150]:
multi_mimic_1_doc_preds = max_fragment(y_frag_pred=multi_mimic_1_test_preds, n_fragments=test_frag)

In [151]:
multi_mimic_1_doc_preds.shape

(250, 727)

In [152]:
%%time
multi_mimic_2_test_preds = np.load('Paper_Executions/p_multi_mimic_ss_test_preds_map_463.npy')

CPU times: user 637 µs, sys: 3.24 ms, total: 3.88 ms
Wall time: 3.55 ms


In [153]:
multi_mimic_2_test_preds.shape

(3955, 727)

In [154]:
multi_mimic_2_doc_preds = max_fragment(y_frag_pred=multi_mimic_2_test_preds, n_fragments=test_frag)

In [155]:
multi_mimic_2_doc_preds.shape

(250, 727)

In [156]:
%%time
multi_mimic_3_test_preds = np.load('Paper_Executions/p_multi_mimic_ss_test_preds_map_462.npy')

CPU times: user 3.21 ms, sys: 630 µs, total: 3.84 ms
Wall time: 3.56 ms


In [157]:
multi_mimic_3_test_preds.shape

(3955, 727)

In [158]:
multi_mimic_3_doc_preds = max_fragment(y_frag_pred=multi_mimic_3_test_preds, n_fragments=test_frag)

In [159]:
multi_mimic_3_doc_preds.shape

(250, 727)

In [160]:
%%time
multi_mimic_4_test_preds = np.load('Paper_Executions/p_multi_mimic_ss_test_preds_map_46.npy')

CPU times: user 0 ns, sys: 3.88 ms, total: 3.88 ms
Wall time: 3.58 ms


In [161]:
multi_mimic_4_test_preds.shape

(3955, 727)

In [162]:
multi_mimic_4_doc_preds = max_fragment(y_frag_pred=multi_mimic_4_test_preds, n_fragments=test_frag)

In [163]:
multi_mimic_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [164]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=multi_mimic_4_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 227 ms, sys: 19.9 ms, total: 247 ms
Wall time: 246 ms


In [165]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.46

CPU times: user 16.9 ms, sys: 8.33 ms, total: 25.2 ms
Wall time: 1.46 s


In [166]:
res = [0.468, 0.465, 0.463, 0.462, 0.46]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.4636 0.0027276363393971734
0.464 0.003


In [167]:
%%time
multi_mimic_galen_test_preds = np.load('Paper_Executions/p_multi_mimic_galen_ss_test_preds_map_508.npy')

CPU times: user 5.39 ms, sys: 12.7 ms, total: 18.1 ms
Wall time: 14.6 ms


In [168]:
multi_mimic_galen_test_preds.shape

(3955, 727)

In [169]:
multi_mimic_galen_doc_preds = max_fragment(y_frag_pred=multi_mimic_galen_test_preds, n_fragments=test_frag)

In [170]:
multi_mimic_galen_doc_preds.shape

(250, 727)

In [171]:
%%time
multi_mimic_galen_1_test_preds = np.load('Paper_Executions/p_multi_mimic_galen_ss_test_preds_map_502.npy')

CPU times: user 133 µs, sys: 4.15 ms, total: 4.29 ms
Wall time: 3.93 ms


In [172]:
multi_mimic_galen_1_test_preds.shape

(3955, 727)

In [173]:
multi_mimic_galen_1_doc_preds = max_fragment(y_frag_pred=multi_mimic_galen_1_test_preds, n_fragments=test_frag)

In [174]:
multi_mimic_galen_1_doc_preds.shape

(250, 727)

In [175]:
%%time
multi_mimic_galen_2_test_preds = np.load('Paper_Executions/p_multi_mimic_galen_ss_test_preds_map_501.npy')

CPU times: user 0 ns, sys: 3.84 ms, total: 3.84 ms
Wall time: 3.55 ms


In [176]:
multi_mimic_galen_2_test_preds.shape

(3955, 727)

In [177]:
multi_mimic_galen_2_doc_preds = max_fragment(y_frag_pred=multi_mimic_galen_2_test_preds, n_fragments=test_frag)

In [178]:
multi_mimic_galen_2_doc_preds.shape

(250, 727)

In [179]:
%%time
multi_mimic_galen_3_test_preds = np.load('Paper_Executions/p_multi_mimic_galen_ss_test_preds_map_498.npy')

CPU times: user 3.36 ms, sys: 3.98 ms, total: 7.34 ms
Wall time: 6.15 ms


In [180]:
multi_mimic_galen_3_test_preds.shape

(3955, 727)

In [181]:
multi_mimic_galen_3_doc_preds = max_fragment(y_frag_pred=multi_mimic_galen_3_test_preds, n_fragments=test_frag)

In [182]:
multi_mimic_galen_3_doc_preds.shape

(250, 727)

In [183]:
%%time
multi_mimic_galen_4_test_preds = np.load('Paper_Executions/p_multi_mimic_galen_ss_test_preds_map_496.npy')

CPU times: user 0 ns, sys: 3.83 ms, total: 3.83 ms
Wall time: 3.51 ms


In [184]:
multi_mimic_galen_4_test_preds.shape

(3955, 727)

In [185]:
multi_mimic_galen_4_doc_preds = max_fragment(y_frag_pred=multi_mimic_galen_4_test_preds, n_fragments=test_frag)

In [186]:
multi_mimic_galen_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [187]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=multi_mimic_galen_4_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 234 ms, sys: 11.4 ms, total: 246 ms
Wall time: 245 ms


In [188]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.496

CPU times: user 13.3 ms, sys: 16.6 ms, total: 29.9 ms
Wall time: 1.46 s


In [189]:
res = [0.508, 0.502, 0.501, 0.498, 0.496]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.5010000000000001 0.0040987803063838426
0.501 0.004


## BETO model

In [190]:
bert_path = "../../../../NLP/BERT_models/BETO_cased/"

In [191]:
vocab_file = "vocab.txt"

In [192]:
bert_token_dict = load_vocabulary(bert_path + vocab_file)

In [193]:
len(bert_token_dict)

31002

In [194]:
bert_tokenizer = Tokenizer(token_dict=bert_token_dict, pad_index=1, cased=True)

In [195]:
# Hyper-parameters (only tune bs, epochs, lr and (optionally) wd)
text_col = "raw_text"
SEQ_LEN = 128 # Lower than max_position_embeddings field in config file (GPU exhausted)

In [196]:
%%time
test_ind, test_seg, test_y, test_frag, test_start_end_frag = ss_brute_force_create_frag_input_data(df_text=df_text_test, 
                                                  text_col=text_col, 
                                                  df_ann=df_codes_d_train, doc_list=test_doc_list, ss_dict=ss_dict_test,
                                                  tokenizer=bert_tokenizer, lab_encoder=mlb_encoder, seq_len=SEQ_LEN,
                                                  greedy=False)

100%|██████████| 250/250 [00:01<00:00, 179.99it/s]

CPU times: user 1.44 s, sys: 34.3 ms, total: 1.47 s
Wall time: 1.44 s





In [197]:
test_ind.shape

(3948, 128)

In [198]:
test_seg.shape

(3948, 128)

In [199]:
test_y.shape

(3948, 727)

In [200]:
test_frag.shape

(250,)

Sanity check:

In [201]:
# Check fragments have n_doc shape
assert len(test_frag) == len(test_doc_list)

In [202]:
# Check indices have n_doc*n_frag x SEQ_LEN shape
assert test_ind.shape == (sum(test_frag), SEQ_LEN)

In [203]:
# Check n_frag distribution across train texts
pd.Series(test_frag).describe()

count    250.000000
mean      15.792000
std        8.114439
min        4.000000
25%       10.000000
50%       14.000000
75%       19.000000
max       59.000000
dtype: float64

In [204]:
# Inspect a randomly selected text and its encoded version
check_id = np.random.randint(low=0, high=len(test_doc_list), size=1)[0]

In [205]:
check_id

6

In [206]:
df_text_test[df_text_test["doc_id"] == test_doc_list[check_id]][text_col].values[0]

'Presentamos una paciente de 63 años de edad, con antecedentes de hipertensión arterial y angor pectoris, remitida a nuestro servicio desde un hospital de un medio rural. El motivo de la consulta era el hallazgo casual en una ecografía de una masa renal izquierda, no refiriendo la paciente sintomatología alguna. En la analítica de sangre sólo aparecía una discreta eosinofilia. Como estudios complementarios se realizaron urografías intravenosas en las que se observó un efecto masa a nivel de flanco izquierdo, de borde liso y bien delimitado, dependiente del polo superior del riñón izquierdo, con compresión extrínseca del sistema pielocalicial, de 16 x 10 cm.. La TAC reveló una masa de 17 x 12 x 19 cm en el polo superior del riñón izquierdo, con zonas quísticas, compatible con nefroma quístico multilocular, sin existir adenopatías. La gammagrafía DMSA mostraba una función renal diferencial del 63% para el riñón derecho y del 37% para el izquierdo.\n\nSe realizó una nueva ecografía en la 

In [207]:
check_id_frag = sum(test_frag[:check_id])
for frag in test_ind[check_id_frag:check_id_frag + test_frag[check_id]]:
    print(' '.join([bert_tokenizer._token_dict_inv[ind] for ind in frag]), "\n")

[CLS] Presenta ##mos una paciente de 63 años de edad , con antecedentes de hiper ##tensión arterial y an ##gor pec ##tori ##s , remi ##tida a nuestro servicio desde un hospital de un medio rural . [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] 

[CLS] El motivo de la consulta era el hallaz ##go cas ##ual en una eco ##grafía de una masa renal izquierda , no refi ##riendo la paciente sin ##toma ##tología alguna . [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [P

In [208]:
# Check segments are a zeros n_doc*n_frag x SEQ_LEN matrix
assert np.array_equal(np.zeros(test_ind.shape), test_seg)

In [209]:
# Check labels shape
assert test_y.shape == (test_ind.shape[0], num_labels)

In [210]:
# Check artificial labels are all zero
assert np.array_equal(np.zeros(test_y.shape), test_y)

In [211]:
# Fragment labels distribution
pd.Series(np.sum(test_y, axis=1)).describe()

count    3948.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
dtype: float64

In [212]:
test_pred_file_path = "proc_test_frag_preds.tsv"

We load the sentence-predictions from the different models, and generate the predictions at document level, using Max criterion:

In [213]:
%%time
beto_test_preds = np.load('Paper_Executions/p_beto_ss_test_preds_map_467.npy')

CPU times: user 0 ns, sys: 3.89 ms, total: 3.89 ms
Wall time: 3.49 ms


In [214]:
beto_test_preds.shape

(3948, 727)

In [215]:
beto_doc_preds = max_fragment(y_frag_pred=beto_test_preds, n_fragments=test_frag)

In [216]:
beto_doc_preds.shape

(250, 727)

In [217]:
%%time
beto_1_test_preds = np.load('Paper_Executions/p_beto_ss_test_preds_map_461.npy')

CPU times: user 658 µs, sys: 4.22 ms, total: 4.88 ms
Wall time: 4.4 ms


In [218]:
beto_1_test_preds.shape

(3948, 727)

In [219]:
beto_1_doc_preds = max_fragment(y_frag_pred=beto_1_test_preds, n_fragments=test_frag)

In [220]:
beto_1_doc_preds.shape

(250, 727)

In [221]:
%%time
beto_2_test_preds = np.load('Paper_Executions/p_beto_ss_test_preds_map_46.npy')

CPU times: user 0 ns, sys: 3.91 ms, total: 3.91 ms
Wall time: 3.58 ms


In [222]:
beto_2_test_preds.shape

(3948, 727)

In [223]:
beto_2_doc_preds = max_fragment(y_frag_pred=beto_2_test_preds, n_fragments=test_frag)

In [224]:
beto_2_doc_preds.shape

(250, 727)

In [225]:
%%time
beto_3_test_preds = np.load('Paper_Executions/p_beto_ss_test_preds_map_452.npy')

CPU times: user 0 ns, sys: 4 ms, total: 4 ms
Wall time: 3.74 ms


In [226]:
beto_3_test_preds.shape

(3948, 727)

In [227]:
beto_3_doc_preds = max_fragment(y_frag_pred=beto_3_test_preds, n_fragments=test_frag)

In [228]:
beto_3_doc_preds.shape

(250, 727)

In [229]:
%%time
beto_4_test_preds = np.load('Paper_Executions/p_beto_ss_test_preds_map_45.npy')

CPU times: user 4.07 ms, sys: 0 ns, total: 4.07 ms
Wall time: 3.72 ms


In [230]:
beto_4_test_preds.shape

(3948, 727)

In [231]:
beto_4_doc_preds = max_fragment(y_frag_pred=beto_4_test_preds, n_fragments=test_frag)

In [232]:
beto_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [233]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=beto_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 252 ms, sys: 0 ns, total: 252 ms
Wall time: 251 ms


In [234]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.467

CPU times: user 20.9 ms, sys: 11.2 ms, total: 32.2 ms
Wall time: 1.46 s


In [235]:
res = [0.467, 0.461, 0.46, 0.452, 0.45]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.458 0.0062289646009589805
0.458 0.006


In [236]:
%%time
beto_galen_test_preds = np.load('Paper_Executions/p_beto_galen_ss_test_preds_map_52.npy')

CPU times: user 4.12 ms, sys: 12.3 ms, total: 16.5 ms
Wall time: 12.4 ms


In [237]:
beto_galen_test_preds.shape

(3948, 727)

In [238]:
beto_galen_doc_preds = max_fragment(y_frag_pred=beto_galen_test_preds, n_fragments=test_frag)

In [239]:
beto_galen_doc_preds.shape

(250, 727)

In [240]:
%%time
beto_galen_1_test_preds = np.load('Paper_Executions/p_beto_galen_ss_test_preds_map_516.npy')

CPU times: user 4.83 ms, sys: 117 µs, total: 4.95 ms
Wall time: 4.23 ms


In [241]:
beto_galen_1_test_preds.shape

(3948, 727)

In [242]:
beto_galen_1_doc_preds = max_fragment(y_frag_pred=beto_galen_1_test_preds, n_fragments=test_frag)

In [243]:
beto_galen_1_doc_preds.shape

(250, 727)

In [244]:
%%time
beto_galen_2_test_preds = np.load('Paper_Executions/p_beto_galen_ss_test_preds_map_512_2.npy')

CPU times: user 0 ns, sys: 3.77 ms, total: 3.77 ms
Wall time: 3.46 ms


In [245]:
beto_galen_2_test_preds.shape

(3948, 727)

In [246]:
beto_galen_2_doc_preds = max_fragment(y_frag_pred=beto_galen_2_test_preds, n_fragments=test_frag)

In [247]:
beto_galen_2_doc_preds.shape

(250, 727)

In [248]:
%%time
beto_galen_3_test_preds = np.load('Paper_Executions/p_beto_galen_ss_test_preds_map_512.npy')

CPU times: user 0 ns, sys: 3.82 ms, total: 3.82 ms
Wall time: 3.5 ms


In [249]:
beto_galen_3_test_preds.shape

(3948, 727)

In [250]:
beto_galen_3_doc_preds = max_fragment(y_frag_pred=beto_galen_3_test_preds, n_fragments=test_frag)

In [251]:
beto_galen_3_doc_preds.shape

(250, 727)

In [252]:
%%time
beto_galen_4_test_preds = np.load('Paper_Executions/p_beto_galen_ss_test_preds_map_509.npy')

CPU times: user 0 ns, sys: 3.79 ms, total: 3.79 ms
Wall time: 3.48 ms


In [253]:
beto_galen_4_test_preds.shape

(3948, 727)

In [254]:
beto_galen_4_doc_preds = max_fragment(y_frag_pred=beto_galen_4_test_preds, n_fragments=test_frag)

In [255]:
beto_galen_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [256]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=beto_galen_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 241 ms, sys: 7.33 ms, total: 249 ms
Wall time: 248 ms


In [257]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.52

CPU times: user 23.1 ms, sys: 5.77 ms, total: 28.9 ms
Wall time: 1.53 s


In [258]:
res = [0.52, 0.516, 0.512, 0.512, 0.509]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.5138 0.003815756805667786
0.514 0.004


## XLM-R model

In [259]:
test_frag = np.load("keras-data/p_xlm_r_test_frag.npy")

In [260]:
test_frag.shape

(250,)

In [261]:
test_frag.sum()

3950

In [262]:
test_pred_file_path = "proc_test_frag_preds.tsv"

We load the sentence-predictions from the different models, and generate the predictions at document level, using Max criterion:

In [263]:
%%time
xlm_r_test_preds = np.load('Paper_Executions/p_xlm_r_seed_0_ss_test_preds_map_477.npy')

CPU times: user 2.49 ms, sys: 7.98 ms, total: 10.5 ms
Wall time: 8.51 ms


In [264]:
xlm_r_test_preds.shape

(3950, 727)

In [265]:
xlm_r_doc_preds = max_fragment(y_frag_pred=xlm_r_test_preds, n_fragments=test_frag)

In [266]:
xlm_r_doc_preds.shape

(250, 727)

In [267]:
%%time
xlm_r_1_test_preds = np.load('Paper_Executions/p_xlm_r_seed_2_ss_test_preds_map_472.npy')

CPU times: user 139 µs, sys: 4.14 ms, total: 4.28 ms
Wall time: 3.9 ms


In [268]:
xlm_r_1_test_preds.shape

(3950, 727)

In [269]:
xlm_r_1_doc_preds = max_fragment(y_frag_pred=xlm_r_1_test_preds, n_fragments=test_frag)

In [270]:
xlm_r_1_doc_preds.shape

(250, 727)

In [271]:
%%time
xlm_r_2_test_preds = np.load('Paper_Executions/p_xlm_r_seed_2_ss_test_preds_map_471.npy')

CPU times: user 0 ns, sys: 3.95 ms, total: 3.95 ms
Wall time: 3.61 ms


In [272]:
xlm_r_2_test_preds.shape

(3950, 727)

In [273]:
xlm_r_2_doc_preds = max_fragment(y_frag_pred=xlm_r_2_test_preds, n_fragments=test_frag)

In [274]:
xlm_r_2_doc_preds.shape

(250, 727)

In [275]:
%%time
xlm_r_3_test_preds = np.load('Paper_Executions/p_xlm_r_seed_1_ss_test_preds_map_463.npy')

CPU times: user 0 ns, sys: 3.86 ms, total: 3.86 ms
Wall time: 3.53 ms


In [276]:
xlm_r_3_test_preds.shape

(3950, 727)

In [277]:
xlm_r_3_doc_preds = max_fragment(y_frag_pred=xlm_r_3_test_preds, n_fragments=test_frag)

In [278]:
xlm_r_3_doc_preds.shape

(250, 727)

In [279]:
%%time
xlm_r_4_test_preds = np.load('Paper_Executions/p_xlm_r_seed_0_ss_test_preds_map_462.npy')

CPU times: user 0 ns, sys: 3.89 ms, total: 3.89 ms
Wall time: 3.57 ms


In [280]:
xlm_r_4_test_preds.shape

(3950, 727)

In [281]:
xlm_r_4_doc_preds = max_fragment(y_frag_pred=xlm_r_4_test_preds, n_fragments=test_frag)

In [282]:
xlm_r_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [283]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=xlm_r_4_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 233 ms, sys: 11.4 ms, total: 245 ms
Wall time: 244 ms


In [284]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.462

CPU times: user 18.4 ms, sys: 11.9 ms, total: 30.4 ms
Wall time: 1.47 s


In [285]:
res = [0.477, 0.472, 0.471, 0.463, 0.462]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.46900000000000003 0.0056920997883030625
0.469 0.006


In [286]:
%%time
xlm_r_galen_test_preds = np.load('Paper_Executions/p_xlm_r_galen_seed_2_ss_test_preds_map_498.npy')

CPU times: user 0 ns, sys: 7.47 ms, total: 7.47 ms
Wall time: 6.97 ms


In [287]:
xlm_r_galen_test_preds.shape

(3950, 727)

In [288]:
xlm_r_galen_doc_preds = max_fragment(y_frag_pred=xlm_r_galen_test_preds, n_fragments=test_frag)

In [289]:
xlm_r_galen_doc_preds.shape

(250, 727)

In [290]:
%%time
xlm_r_galen_1_test_preds = np.load('Paper_Executions/p_xlm_r_galen_seed_3_ss_test_preds_map_494.npy')

CPU times: user 973 µs, sys: 4.27 ms, total: 5.25 ms
Wall time: 4.56 ms


In [291]:
xlm_r_galen_1_test_preds.shape

(3950, 727)

In [292]:
xlm_r_galen_1_doc_preds = max_fragment(y_frag_pred=xlm_r_galen_1_test_preds, n_fragments=test_frag)

In [293]:
xlm_r_galen_1_doc_preds.shape

(250, 727)

In [294]:
%%time
xlm_r_galen_2_test_preds = np.load('Paper_Executions/p_xlm_r_galen_seed_0_ss_test_preds_map_493.npy')

CPU times: user 3.89 ms, sys: 0 ns, total: 3.89 ms
Wall time: 3.6 ms


In [295]:
xlm_r_galen_2_test_preds.shape

(3950, 727)

In [296]:
xlm_r_galen_2_doc_preds = max_fragment(y_frag_pred=xlm_r_galen_2_test_preds, n_fragments=test_frag)

In [297]:
xlm_r_galen_2_doc_preds.shape

(250, 727)

In [298]:
%%time
xlm_r_galen_3_test_preds = np.load('Paper_Executions/p_xlm_r_galen_seed_1_ss_test_preds_map_49.npy')

CPU times: user 0 ns, sys: 3.73 ms, total: 3.73 ms
Wall time: 3.47 ms


In [299]:
xlm_r_galen_3_test_preds.shape

(3950, 727)

In [300]:
xlm_r_galen_3_doc_preds = max_fragment(y_frag_pred=xlm_r_galen_3_test_preds, n_fragments=test_frag)

In [301]:
xlm_r_galen_3_doc_preds.shape

(250, 727)

In [302]:
%%time
xlm_r_galen_4_test_preds = np.load('Paper_Executions/p_xlm_r_galen_seed_0_ss_test_preds_map_488.npy')

CPU times: user 50 µs, sys: 4.12 ms, total: 4.17 ms
Wall time: 3.83 ms


In [303]:
xlm_r_galen_4_test_preds.shape

(3950, 727)

In [304]:
xlm_r_galen_4_doc_preds = max_fragment(y_frag_pred=xlm_r_galen_4_test_preds, n_fragments=test_frag)

In [305]:
xlm_r_galen_4_doc_preds.shape

(250, 727)

### RESULTS FOR PAPER

In [306]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=xlm_r_galen_4_doc_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 235 ms, sys: 7.24 ms, total: 242 ms
Wall time: 241 ms


In [307]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.488

CPU times: user 11.9 ms, sys: 21.1 ms, total: 32.9 ms
Wall time: 1.48 s


In [308]:
res = [0.498, 0.494, 0.493, 0.49, 0.488]
print(np.mean(res), np.std(res))
print(round(np.mean(res), 3), round(np.std(res), 3))

0.49260000000000004 0.0034409301068170535
0.493 0.003


In [309]:
# COMPUTE P, R, F1 METRICS

In [310]:
# Automatic

In [311]:
thr_arr = np.linspace(0.01, 0.60, 60)
gs_path = "../../datasets/final_dataset_v4_to_publish/test/testP.tsv"
valid_path = "../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv"

# MAP
gs_out_path = "./test_intermediate_gs_file.txt"
format_gs(filepath=gs_path, output_path=gs_out_path)
valid_d_codes = set(pd.read_csv(valid_path, sep='\t', header=None, 
                              usecols=[0])[0].tolist())
valid_d_codes = set([x.lower() for x in valid_d_codes])

In [312]:
# Multi

In [312]:
doc_preds = multi_doc_preds

In [314]:
%%time
df_res_metrics = compute_metrics(thr_arr=thr_arr, doc_preds=doc_preds)

CPU times: user 35.7 s, sys: 111 ms, total: 35.8 s
Wall time: 35.8 s


In [315]:
df_res_metrics.sort_values(by=["F1", "MAP"], ascending=False).head(20)

Unnamed: 0,P,R,F1,MAP,Thr
51,0.669,0.448,0.537,0.479,0.52
55,0.681,0.443,0.537,0.478,0.56
57,0.687,0.44,0.537,0.478,0.58
52,0.67,0.446,0.536,0.481,0.53
53,0.674,0.445,0.536,0.48,0.54
54,0.678,0.444,0.536,0.479,0.55
50,0.664,0.449,0.536,0.477,0.51
56,0.683,0.441,0.536,0.477,0.57
49,0.659,0.449,0.535,0.477,0.5
47,0.655,0.451,0.534,0.478,0.48


In [313]:
best_thr = 0.52

In [318]:
df_res_metrics[round(df_res_metrics['Thr'], 2) == best_thr]

Unnamed: 0,P,R,F1,MAP,Thr
51,0.669,0.448,0.537,0.479,0.52


In [None]:
# Sanity check: Manual

In [319]:
thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                              doc_list=test_doc_list, thr=best_thr)[["doc_id", "code"]] \
    .to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

In [320]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.479

CPU times: user 13.8 ms, sys: 8 ms, total: 21.8 ms
Wall time: 769 ms


In [None]:
## Avg

In [314]:
pd.DataFrame(compute_metrics_avg(thr=best_thr, doc_preds_arr=[multi_doc_preds, multi_1_doc_preds, multi_2_doc_preds, 
                                                 multi_3_doc_preds, multi_4_doc_preds])).transpose()

Unnamed: 0,P,P_std,P_max,R,R_std,R_max,F1,F1_std,F1_max,Thr
0,0.655,0.013,0.669,0.438,0.013,0.452,0.525,0.012,0.537,0.52


In [None]:
## Ensemble

In [315]:
sum_ens = (multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds)/5

In [316]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.736,0.433,0.545,0.494,0.52


In [None]:
# Sanity check: Manual

In [324]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.494

CPU times: user 13.1 ms, sys: 8.11 ms, total: 21.2 ms
Wall time: 765 ms


In [262]:
# Multi-Galén

In [317]:
doc_preds = multi_galen_doc_preds

In [326]:
%%time
df_res_metrics = compute_metrics(thr_arr, doc_preds)

CPU times: user 35.8 s, sys: 146 ms, total: 35.9 s
Wall time: 35.9 s


In [327]:
df_res_metrics.sort_values(by=["F1", "MAP"], ascending=False).head(20)

Unnamed: 0,P,R,F1,MAP,Thr
11,0.603,0.51,0.552,0.509,0.12
12,0.607,0.505,0.551,0.505,0.13
13,0.608,0.502,0.55,0.503,0.14
10,0.593,0.511,0.549,0.508,0.11
14,0.61,0.499,0.549,0.502,0.15
9,0.586,0.514,0.548,0.508,0.1
8,0.577,0.52,0.547,0.51,0.09
16,0.616,0.493,0.547,0.502,0.17
15,0.612,0.494,0.547,0.499,0.16
6,0.562,0.528,0.545,0.504,0.07


In [318]:
best_thr = 0.12

In [329]:
df_res_metrics[round(df_res_metrics['Thr'], 2) == best_thr]

Unnamed: 0,P,R,F1,MAP,Thr
11,0.603,0.51,0.552,0.509,0.12


In [None]:
# Sanity check: Manual

In [330]:
thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                              doc_list=test_doc_list, thr=best_thr)[["doc_id", "code"]] \
    .to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

In [331]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.509

CPU times: user 16.2 ms, sys: 12.2 ms, total: 28.4 ms
Wall time: 779 ms


In [None]:
## Avg

In [319]:
pd.DataFrame(compute_metrics_avg(thr=best_thr, doc_preds_arr=[multi_galen_doc_preds, multi_galen_1_doc_preds, multi_galen_2_doc_preds, 
                                                              multi_galen_3_doc_preds, multi_galen_4_doc_preds])).transpose()

Unnamed: 0,P,P_std,P_max,R,R_std,R_max,F1,F1_std,F1_max,Thr
0,0.582,0.014,0.603,0.501,0.009,0.51,0.538,0.009,0.552,0.12


In [None]:
## Ensemble

In [320]:
sum_ens = (multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds)/5 

In [321]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.568,0.573,0.571,0.539,0.12


In [None]:
# Sanity check: Manual

In [335]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.539

CPU times: user 16.5 ms, sys: 32.1 ms, total: 48.6 ms
Wall time: 783 ms


In [262]:
# BETO

In [322]:
doc_preds = beto_doc_preds

In [337]:
%%time
df_res_metrics = compute_metrics(thr_arr, doc_preds)

CPU times: user 35.8 s, sys: 135 ms, total: 35.9 s
Wall time: 35.9 s


In [338]:
df_res_metrics.sort_values(by=["F1", "MAP"], ascending=False).head(20)

Unnamed: 0,P,R,F1,MAP,Thr
52,0.643,0.44,0.523,0.467,0.53
51,0.64,0.44,0.522,0.467,0.52
53,0.644,0.439,0.522,0.467,0.54
54,0.645,0.438,0.522,0.467,0.55
55,0.647,0.438,0.522,0.467,0.56
20,0.579,0.473,0.521,0.472,0.21
50,0.635,0.441,0.521,0.465,0.51
56,0.647,0.436,0.521,0.465,0.57
18,0.572,0.477,0.52,0.476,0.19
45,0.629,0.444,0.52,0.466,0.46


In [323]:
best_thr = 0.53

In [340]:
df_res_metrics[round(df_res_metrics['Thr'], 2) == best_thr]

Unnamed: 0,P,R,F1,MAP,Thr
52,0.643,0.44,0.523,0.467,0.53


In [None]:
# Sanity check: Manual

In [345]:
thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                              doc_list=test_doc_list, thr=best_thr)[["doc_id", "code"]] \
    .to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

In [346]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.467

CPU times: user 11.3 ms, sys: 8.04 ms, total: 19.4 ms
Wall time: 757 ms


In [None]:
## Avg

In [324]:
pd.DataFrame(compute_metrics_avg(thr=best_thr, doc_preds_arr=[beto_doc_preds, beto_1_doc_preds, beto_2_doc_preds, 
                                                              beto_3_doc_preds, beto_4_doc_preds])).transpose()

Unnamed: 0,P,P_std,P_max,R,R_std,R_max,F1,F1_std,F1_max,Thr
0,0.661,0.013,0.675,0.407,0.019,0.44,0.503,0.013,0.523,0.53


In [None]:
## Ensemble

In [325]:
sum_ens = (beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds)/5

In [326]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.755,0.404,0.526,0.474,0.53


In [None]:
# Sanity check: Manual

In [294]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.474

CPU times: user 7.63 ms, sys: 7.95 ms, total: 15.6 ms
Wall time: 763 ms


In [262]:
# BETO-Galén

In [327]:
doc_preds = beto_galen_doc_preds

In [344]:
%%time
df_res_metrics = compute_metrics(thr_arr, doc_preds)

CPU times: user 35.8 s, sys: 108 ms, total: 35.9 s
Wall time: 35.9 s


In [345]:
df_res_metrics.sort_values(by=["F1", "MAP"], ascending=False).head(20)

Unnamed: 0,P,R,F1,MAP,Thr
30,0.642,0.488,0.555,0.509,0.31
40,0.674,0.472,0.555,0.508,0.41
44,0.683,0.468,0.555,0.506,0.45
42,0.68,0.469,0.555,0.504,0.43
31,0.645,0.485,0.554,0.507,0.32
38,0.667,0.473,0.554,0.507,0.39
45,0.683,0.466,0.554,0.506,0.46
37,0.665,0.474,0.554,0.505,0.38
41,0.676,0.47,0.554,0.505,0.42
46,0.685,0.465,0.554,0.505,0.47


In [329]:
best_thr = 0.31

In [347]:
df_res_metrics[round(df_res_metrics['Thr'], 2) == best_thr]

Unnamed: 0,P,R,F1,MAP,Thr
30,0.642,0.488,0.555,0.509,0.31


In [None]:
# Sanity check: Manual

In [354]:
thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                              doc_list=test_doc_list, thr=best_thr)[["doc_id", "code"]] \
    .to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

In [355]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.509

CPU times: user 13.1 ms, sys: 8.06 ms, total: 21.1 ms
Wall time: 763 ms


In [None]:
## Avg

In [330]:
pd.DataFrame(compute_metrics_avg(thr=best_thr, doc_preds_arr=[beto_galen_doc_preds, beto_galen_1_doc_preds, beto_galen_2_doc_preds, 
                                                              beto_galen_3_doc_preds, beto_galen_4_doc_preds])).transpose()

Unnamed: 0,P,P_std,P_max,R,R_std,R_max,F1,F1_std,F1_max,Thr
0,0.649,0.009,0.663,0.482,0.008,0.493,0.553,0.005,0.56,0.31


In [None]:
## Ensemble

In [331]:
sum_ens = (beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds)/5 

In [332]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.664,0.502,0.572,0.532,0.31


In [None]:
# Sanity check: Manual

In [304]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.532

CPU times: user 14.6 ms, sys: 3.98 ms, total: 18.6 ms
Wall time: 766 ms


In [262]:
# XLM-R

In [333]:
doc_preds = xlm_r_doc_preds

In [334]:
%%time
df_res_metrics = compute_metrics(thr_arr, doc_preds)

CPU times: user 37.3 s, sys: 505 ms, total: 37.8 s
Wall time: 37.7 s


In [335]:
df_res_metrics.sort_values(by=["F1", "MAP"], ascending=False).head(20)

Unnamed: 0,P,R,F1,MAP,Thr
53,0.611,0.473,0.533,0.474,0.54
50,0.599,0.48,0.533,0.472,0.51
54,0.612,0.47,0.532,0.474,0.55
48,0.595,0.481,0.532,0.473,0.49
55,0.617,0.468,0.532,0.473,0.56
52,0.606,0.473,0.532,0.47,0.53
59,0.626,0.461,0.531,0.474,0.6
47,0.592,0.481,0.531,0.473,0.48
45,0.588,0.485,0.531,0.472,0.46
49,0.594,0.48,0.531,0.472,0.5


In [350]:
best_thr = 0.43

In [351]:
df_res_metrics[round(df_res_metrics['Thr'], 2) == best_thr]

Unnamed: 0,P,R,F1,MAP,Thr
42,0.574,0.487,0.527,0.469,0.43


In [None]:
# Sanity check: Manual

In [363]:
thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                              doc_list=test_doc_list, thr=best_thr)[["doc_id", "code"]] \
    .to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

In [364]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.474

CPU times: user 14 ms, sys: 8.05 ms, total: 22 ms
Wall time: 750 ms


In [None]:
## Avg

In [352]:
pd.DataFrame(compute_metrics_avg(thr=best_thr, doc_preds_arr=[xlm_r_doc_preds, xlm_r_1_doc_preds, xlm_r_2_doc_preds, 
                                                              xlm_r_3_doc_preds, xlm_r_4_doc_preds])).transpose()

Unnamed: 0,P,P_std,P_max,R,R_std,R_max,F1,F1_std,F1_max,Thr
0,0.608,0.024,0.638,0.461,0.018,0.487,0.524,0.003,0.527,0.43


In [None]:
## Ensemble

In [353]:
sum_ens = (xlm_r_doc_preds + xlm_r_1_doc_preds + xlm_r_2_doc_preds + xlm_r_3_doc_preds + xlm_r_4_doc_preds)/5 

In [354]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.699,0.468,0.56,0.494,0.43


In [None]:
# Sanity check: Manual

In [372]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.481

CPU times: user 10.1 ms, sys: 12 ms, total: 22.2 ms
Wall time: 751 ms


In [262]:
# XLM-R-Galén

In [355]:
doc_preds = xlm_r_galen_doc_preds

In [359]:
%%time
df_res_metrics = compute_metrics(thr_arr, doc_preds)

CPU times: user 35.9 s, sys: 148 ms, total: 36 s
Wall time: 36 s


In [360]:
df_res_metrics.sort_values(by=["F1", "MAP"], ascending=False).head(20)

Unnamed: 0,P,R,F1,MAP,Thr
42,0.632,0.478,0.544,0.482,0.43
32,0.612,0.488,0.543,0.487,0.33
41,0.628,0.478,0.543,0.482,0.42
43,0.631,0.476,0.543,0.481,0.44
44,0.633,0.476,0.543,0.481,0.45
49,0.646,0.468,0.543,0.481,0.5
45,0.636,0.474,0.543,0.48,0.46
33,0.614,0.486,0.542,0.488,0.34
30,0.605,0.49,0.542,0.487,0.31
31,0.609,0.488,0.542,0.487,0.32


In [368]:
best_thr = 0.33

In [362]:
df_res_metrics[round(df_res_metrics['Thr'], 2) == best_thr]

Unnamed: 0,P,R,F1,MAP,Thr
32,0.612,0.488,0.543,0.487,0.33


In [None]:
# Sanity check: Manual

In [382]:
thr_codiesp_prediction_format(y_pred=doc_preds, label_encoder_classes=mlb_encoder.classes_, 
                              doc_list=test_doc_list, thr=best_thr)[["doc_id", "code"]] \
    .to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

In [383]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.487

CPU times: user 5.29 ms, sys: 16 ms, total: 21.3 ms
Wall time: 750 ms


In [None]:
## Avg

In [369]:
pd.DataFrame(compute_metrics_avg(thr=best_thr, doc_preds_arr=[xlm_r_galen_doc_preds, xlm_r_galen_1_doc_preds, xlm_r_galen_2_doc_preds, 
                                                              xlm_r_galen_3_doc_preds, xlm_r_galen_4_doc_preds])).transpose()

Unnamed: 0,P,P_std,P_max,R,R_std,R_max,F1,F1_std,F1_max,Thr
0,0.6,0.012,0.612,0.477,0.01,0.49,0.532,0.008,0.543,0.33


In [None]:
## Ensemble

In [372]:
sum_ens = (xlm_r_galen_doc_preds + xlm_r_galen_1_doc_preds + xlm_r_galen_2_doc_preds + xlm_r_galen_3_doc_preds + xlm_r_galen_4_doc_preds)/5 

In [373]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.646,0.503,0.566,0.512,0.33


In [None]:
# Sanity check: Manual

In [388]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.512

CPU times: user 14.7 ms, sys: 20 ms, total: 34.7 ms
Wall time: 774 ms


In [None]:
# Multi + Multi-Galén

In [374]:
best_thr = (0.52 + 0.12)/2

In [375]:
sum_ens = (multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds + 
 multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds)/10 

In [376]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.692,0.482,0.568,0.527,0.32


In [None]:
# BETO + BETO-Galén

In [377]:
best_thr = (0.53 + 0.31)/2

In [378]:
sum_ens = (beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds + 
 beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds)/10 

In [379]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.726,0.456,0.56,0.505,0.42


In [None]:
# XLM + XLM-R-Galén

In [380]:
best_thr = (0.43 + 0.33)/2

In [381]:
sum_ens = (xlm_r_doc_preds + xlm_r_1_doc_preds + xlm_r_2_doc_preds + xlm_r_3_doc_preds + xlm_r_4_doc_preds + 
 xlm_r_galen_doc_preds + xlm_r_galen_1_doc_preds + xlm_r_galen_2_doc_preds + xlm_r_galen_3_doc_preds + xlm_r_galen_4_doc_preds)/10 

In [382]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.681,0.484,0.566,0.502,0.38


In [None]:
# Multi + BETO + XLM-R

In [383]:
best_thr = (0.52 + 0.53 + 0.43)/3

In [384]:
sum_ens = (multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds + 
 beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds +
 xlm_r_doc_preds + xlm_r_1_doc_preds + xlm_r_2_doc_preds + xlm_r_3_doc_preds + xlm_r_4_doc_preds)/15 

In [385]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.75,0.431,0.548,0.497,0.493333


In [None]:
# Multi-Galén + BETO-Galén + XLM-R-Galén

In [386]:
best_thr = (0.12 + 0.31 + 0.33)/3

In [387]:
sum_ens = (multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds + 
    beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds +
    xlm_r_galen_doc_preds + xlm_r_galen_1_doc_preds + xlm_r_galen_2_doc_preds + xlm_r_galen_3_doc_preds + xlm_r_galen_4_doc_preds)/15 

In [388]:
compute_metrics([best_thr], sum_ens)

Unnamed: 0,P,R,F1,MAP,Thr
0,0.646,0.524,0.579,0.541,0.253333


In [None]:
# Sanity check: Manual

In [411]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.541

CPU times: user 13.2 ms, sys: 8.04 ms, total: 21.3 ms
Wall time: 761 ms


### Multi-Scratch + Multi-Galén

#### All

In [262]:
sum_test_preds = multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds + multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds

In [263]:
sum_test_preds.shape

(250, 727)

In [264]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 248 ms, sys: 11.9 ms, total: 260 ms
Wall time: 259 ms


In [265]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.528

CPU times: user 11.3 ms, sys: 16.2 ms, total: 27.5 ms
Wall time: 1.59 s


### BETO-Scratch + BETO-Galén

#### All

In [270]:
sum_test_preds = beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds + beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds 

In [271]:
sum_test_preds.shape

(250, 727)

In [272]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 249 ms, sys: 12.2 ms, total: 261 ms
Wall time: 261 ms


In [273]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.534

CPU times: user 15 ms, sys: 12.1 ms, total: 27.1 ms
Wall time: 1.61 s


### XLM-R-Scratch + XLM-R-Galén

#### All

In [347]:
sum_test_preds = xlm_r_doc_preds + xlm_r_1_doc_preds + xlm_r_2_doc_preds + xlm_r_3_doc_preds + xlm_r_4_doc_preds + xlm_r_galen_doc_preds + xlm_r_galen_1_doc_preds + xlm_r_galen_2_doc_preds + xlm_r_galen_3_doc_preds + xlm_r_galen_4_doc_preds

In [348]:
sum_test_preds.shape

(250, 727)

In [349]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 246 ms, sys: 7.84 ms, total: 253 ms
Wall time: 253 ms


In [350]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.524

CPU times: user 12.9 ms, sys: 16.3 ms, total: 29.2 ms
Wall time: 1.56 s


### Multi-Scratch + BETO-Scratch

#### All

In [278]:
sum_test_preds = multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds + beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds

In [279]:
sum_test_preds.shape

(250, 727)

In [280]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 250 ms, sys: 11.7 ms, total: 262 ms
Wall time: 261 ms


In [281]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.515

CPU times: user 25.2 ms, sys: 4.69 ms, total: 29.9 ms
Wall time: 1.53 s


### Multi-Scratch + BETO-Scratch + XLM-R-Scratch

#### All

In [359]:
sum_test_preds = multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds + beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds + xlm_r_doc_preds + xlm_r_1_doc_preds + xlm_r_2_doc_preds + xlm_r_3_doc_preds + xlm_r_4_doc_preds

In [360]:
sum_test_preds.shape

(250, 727)

In [361]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 250 ms, sys: 3.84 ms, total: 254 ms
Wall time: 253 ms


In [362]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.52

CPU times: user 22 ms, sys: 11.3 ms, total: 33.3 ms
Wall time: 1.57 s


### Multi-Galén + BETO-Galén

#### All

In [286]:
sum_test_preds = multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds + beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds 

In [287]:
sum_test_preds.shape

(250, 727)

In [288]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 264 ms, sys: 3.53 ms, total: 267 ms
Wall time: 267 ms


In [289]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.541

CPU times: user 19.9 ms, sys: 8.09 ms, total: 28 ms
Wall time: 1.6 s


### Multi-Galén + BETO-Galén + XLM-R-Galén

#### All

In [371]:
sum_test_preds = multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds + beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds + xlm_r_galen_doc_preds + xlm_r_galen_1_doc_preds + xlm_r_galen_2_doc_preds + xlm_r_galen_3_doc_preds + xlm_r_galen_4_doc_preds 

In [372]:
sum_test_preds.shape

(250, 727)

In [373]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 248 ms, sys: 7.44 ms, total: 255 ms
Wall time: 254 ms


In [374]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.544

CPU times: user 26.4 ms, sys: 4.55 ms, total: 31 ms
Wall time: 1.59 s


### Multi-Scratch

#### All

In [290]:
sum_test_preds = multi_doc_preds + multi_1_doc_preds + multi_2_doc_preds + multi_3_doc_preds + multi_4_doc_preds

In [291]:
sum_test_preds.shape

(250, 727)

In [292]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 266 ms, sys: 7.61 ms, total: 273 ms
Wall time: 273 ms


In [293]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.508

CPU times: user 25.9 ms, sys: 4.97 ms, total: 30.9 ms
Wall time: 1.61 s


### Multi-Galén

#### All

In [294]:
sum_test_preds = multi_galen_doc_preds + multi_galen_1_doc_preds + multi_galen_2_doc_preds + multi_galen_3_doc_preds + multi_galen_4_doc_preds

In [295]:
sum_test_preds.shape

(250, 727)

In [296]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 264 ms, sys: 3.73 ms, total: 268 ms
Wall time: 268 ms


In [297]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.521

CPU times: user 26.8 ms, sys: 4.78 ms, total: 31.6 ms
Wall time: 1.58 s


### Multi-MIMIC

#### All

In [298]:
sum_test_preds = multi_mimic_doc_preds + multi_mimic_1_doc_preds + multi_mimic_2_doc_preds + multi_mimic_3_doc_preds + multi_mimic_4_doc_preds

In [299]:
sum_test_preds.shape

(250, 727)

In [300]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 252 ms, sys: 7.82 ms, total: 260 ms
Wall time: 260 ms


In [301]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.493

CPU times: user 22.7 ms, sys: 8.42 ms, total: 31.1 ms
Wall time: 1.63 s


### Multi-Galén-MIMIC

#### All

In [304]:
sum_test_preds = multi_mimic_galen_doc_preds + multi_mimic_galen_1_doc_preds + multi_mimic_galen_2_doc_preds + multi_mimic_galen_3_doc_preds + multi_mimic_galen_4_doc_preds

In [305]:
sum_test_preds.shape

(250, 727)

In [306]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 255 ms, sys: 7.83 ms, total: 263 ms
Wall time: 261 ms


In [307]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.526

CPU times: user 22.6 ms, sys: 8.5 ms, total: 31.1 ms
Wall time: 1.65 s


### BETO-Scratch

#### All

In [252]:
sum_test_preds = beto_doc_preds + beto_1_doc_preds + beto_2_doc_preds + beto_3_doc_preds + beto_4_doc_preds

In [253]:
sum_test_preds.shape

(250, 727)

In [254]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 242 ms, sys: 11.7 ms, total: 254 ms
Wall time: 254 ms


In [255]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.496

CPU times: user 19.7 ms, sys: 11.5 ms, total: 31.2 ms
Wall time: 1.53 s


### BETO-Galén

#### All

In [256]:
sum_test_preds = beto_galen_doc_preds + beto_galen_1_doc_preds + beto_galen_2_doc_preds + beto_galen_3_doc_preds + beto_galen_4_doc_preds

In [257]:
sum_test_preds.shape

(250, 727)

In [258]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 260 ms, sys: 0 ns, total: 260 ms
Wall time: 259 ms


In [259]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.537

CPU times: user 14.4 ms, sys: 12.8 ms, total: 27.1 ms
Wall time: 1.53 s


### XLM-R-Scratch

#### All

In [341]:
sum_test_preds = xlm_r_doc_preds + xlm_r_1_doc_preds + xlm_r_2_doc_preds + xlm_r_3_doc_preds + xlm_r_4_doc_preds

In [342]:
sum_test_preds.shape

(250, 727)

In [343]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 252 ms, sys: 3.66 ms, total: 255 ms
Wall time: 255 ms


In [344]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.501

CPU times: user 18.2 ms, sys: 12.4 ms, total: 30.6 ms
Wall time: 1.6 s


### XLM-R-Galén

#### All

In [333]:
sum_test_preds = xlm_r_galen_doc_preds + xlm_r_galen_1_doc_preds + xlm_r_galen_2_doc_preds + xlm_r_galen_3_doc_preds + xlm_r_galen_4_doc_preds

In [334]:
sum_test_preds.shape

(250, 727)

In [335]:
%%time
test_pred_df = prob_codiesp_prediction_format(y_pred=sum_test_preds,
                                      label_encoder_classes=mlb_encoder.classes_, doc_list=test_doc_list)
test_pred_df[["doc_id", "code"]].to_csv(path_or_buf=test_pred_file_path, sep="\t", header=False, index=False)

CPU times: user 253 ms, sys: 0 ns, total: 253 ms
Wall time: 252 ms


In [336]:
%%time
# Manual insert (copy + paste) train pred file name
!python ../../resources/CodiEsp-Evaluation-Script/codiespD_P_evaluation.py -g ../../datasets/final_dataset_v4_to_publish/test/testP.tsv -p proc_test_frag_preds.tsv -c ../../resources/CodiEsp-Evaluation-Script/codiesp_codes/codiesp-P_codes.tsv  


MAP estimate: 0.526

CPU times: user 22.7 ms, sys: 12.1 ms, total: 34.9 ms
Wall time: 1.6 s
