# Order-free matching performance

In [1]:
# Generic imports

import glob
import os
import pandas as pd
from os import listdir
from os.path import isfile, join
import ast
import json


# Sklearn imports
from sklearn.metrics import f1_score, recall_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, auc, roc_curve

from tqdm import tqdm

## Read order-free candidates

In [2]:
picos = 'I' # ['P', 'I', 'O', 'S']
match_level = 'doc' # ['doc', 'sent', 'win_5', 'para']

In [3]:
order_free_dir = f'/mnt/nas2/results/Results/systematicReview/order_free_matching/EBM_PICO_training_matches/order_free/{match_level}/{picos}'
order_free_files = os.listdir(order_free_dir)
print('Files: ', order_free_files)

Files:  ['Biomedical_or_Dental_Material.json', '.nfs0000000011200a4200000003', 'Classification.json', 'Intellectual_Product.json', 'Biologically_Active_Substance.json', 'Diagnostic_Procedure.json', 'Gene_or_Genome.json', 'Finding.json', 'Functional_Concept.json', 'Medical_Device.json', 'Organic_Chemical.json', 'Laboratory__Procedure.json', 'Manufactured_Object.json', 'train_ebm_intervention.json', 'Professional_Society.json', 'Pharmacologic_Substance.json', 'Therapeutic_or_Preventive_Procedure.json', 'train_ebm_intervention_syn.json', 'Biomedical_Occupation_or_Discipline.json', 'Health_Care_Activity.json', 'Idea_or_Concept.json', 'Temporal_Concept.json']


In [4]:
order_free_files.remove('.nfs0000000011200a4200000003')
#order_free_files.remove('Finding.json')

In [5]:
orf_loaded_files = dict()

for i in tqdm(order_free_files):
    
    filpath = f'{order_free_dir}/{i}'
    print('Loading file...', i)
    with open( filpath, 'r' ) as rf:
        orf_i = json.load(rf)
        orf_loaded_files[i] = orf_i

  0%|          | 0/20 [00:00<?, ?it/s]

Loading file... Biomedical_or_Dental_Material.json


  5%|▌         | 1/20 [00:00<00:15,  1.24it/s]

Loading file... Classification.json


 10%|█         | 2/20 [00:01<00:11,  1.59it/s]

Loading file... Intellectual_Product.json


 15%|█▌        | 3/20 [00:15<01:59,  7.02s/it]

Loading file... Biologically_Active_Substance.json


 20%|██        | 4/20 [00:20<01:34,  5.89s/it]

Loading file... Diagnostic_Procedure.json


 25%|██▌       | 5/20 [00:44<03:09, 12.60s/it]

Loading file... Gene_or_Genome.json


 30%|███       | 6/20 [00:53<02:38, 11.30s/it]

Loading file... Functional_Concept.json


 35%|███▌      | 7/20 [00:54<01:41,  7.83s/it]

Loading file... Medical_Device.json


 40%|████      | 8/20 [00:57<01:18,  6.56s/it]

Loading file... Organic_Chemical.json


 45%|████▌     | 9/20 [01:08<01:26,  7.84s/it]

Loading file... Laboratory__Procedure.json


 50%|█████     | 10/20 [01:35<02:16, 13.63s/it]

Loading file... Manufactured_Object.json


 55%|█████▌    | 11/20 [01:35<01:26,  9.65s/it]

Loading file... train_ebm_intervention.json


 60%|██████    | 12/20 [02:08<02:13, 16.70s/it]

Loading file... Professional_Society.json
Loading file... Pharmacologic_Substance.json


 70%|███████   | 14/20 [02:13<01:00, 10.02s/it]

Loading file... Therapeutic_or_Preventive_Procedure.json


 75%|███████▌  | 15/20 [06:25<05:51, 70.29s/it]

Loading file... train_ebm_intervention_syn.json


 80%|████████  | 16/20 [06:27<03:29, 52.47s/it]

Loading file... Biomedical_Occupation_or_Discipline.json
Loading file... Health_Care_Activity.json


 90%|█████████ | 18/20 [07:32<01:27, 43.98s/it]

Loading file... Idea_or_Concept.json


 95%|█████████▌| 19/20 [07:33<00:33, 33.74s/it]

Loading file... Temporal_Concept.json


100%|██████████| 20/20 [07:33<00:00, 22.69s/it]


## difference between intervention and intervention syn

In [12]:
def get_orf(v, par):
    
    orfs = dict()
    
    for k_i, v_i in v.items():
        # print( k_i ) # example: name_15_3, name_9_9, name_12_8
        
        if 'Inters. (full)' in v_i and len(v_i['Inters. (full)']) > 0:
            
            full_inters = v_i['Inters. (full)']
            # full_inters.keys() = PMIDs
            # full_inters.values() = offsets, tokens

            for pmid, matches in full_inters.items():
                if len(matches['char offs.']) > 1:
                    if pmid not in orfs:
                        orfs[pmid] = [ ]
                        orfs[pmid].extend( matches['word offs.'] )
                    else:
                        orfs[pmid].extend( matches['word offs.'] )


        if 'Inters. (partial)' in v_i and len(v_i['Inters. (partial)']) > 0 and (par=='both' or par==True):
            
            par_inters = v_i['Inters. (partial)']
            
            for pmid, matches in par_inters.items():
                if len(matches['char offs.']) > 1:
                    if pmid not in orfs:
                        orfs[pmid] = [ ]
                        orfs[pmid].extend( matches['word offs.'] )
                    else:
                        orfs[pmid].extend( matches['word offs.'] )
    
    return orfs

In [32]:
orfs_dict = dict()

for k,v in orf_loaded_files.items():

    orf_fetched = get_orf(v, par = False)
    orfs_dict[k] = orf_fetched

In [33]:
order_offset_dict = dict()

for k,v in orfs_dict.items():
    
    total_offsets = []
    for k_i, v_i in v.items():
        total_offsets.extend( v_i )
    
    order_offset_dict[k] = len(total_offsets)
    
    print( f'Offsets in {k}: ', len(total_offsets) )

Offsets in Biomedical_or_Dental_Material.json:  3258
Offsets in Classification.json:  8369
Offsets in Intellectual_Product.json:  86335
Offsets in Biologically_Active_Substance.json:  15494
Offsets in Diagnostic_Procedure.json:  33795
Offsets in Gene_or_Genome.json:  7226
Offsets in Functional_Concept.json:  19792
Offsets in Medical_Device.json:  10628
Offsets in Organic_Chemical.json:  13818
Offsets in Laboratory__Procedure.json:  44098
Offsets in Manufactured_Object.json:  7624
Offsets in train_ebm_intervention.json:  2835
Offsets in Professional_Society.json:  8
Offsets in Pharmacologic_Substance.json:  37882
Offsets in Therapeutic_or_Preventive_Procedure.json:  145151
Offsets in train_ebm_intervention_syn.json:  25540
Offsets in Biomedical_Occupation_or_Discipline.json:  4425
Offsets in Health_Care_Activity.json:  60790
Offsets in Idea_or_Concept.json:  13321
Offsets in Temporal_Concept.json:  70258


In [34]:
# sort dictionary
order_offset_sorteddict = dict(sorted(order_offset_dict.items(), key=lambda x: x[1]))

In [35]:
order_offset_sorteddict

{'Professional_Society.json': 8,
 'train_ebm_intervention.json': 2835,
 'Biomedical_or_Dental_Material.json': 3258,
 'Biomedical_Occupation_or_Discipline.json': 4425,
 'Gene_or_Genome.json': 7226,
 'Manufactured_Object.json': 7624,
 'Classification.json': 8369,
 'Medical_Device.json': 10628,
 'Idea_or_Concept.json': 13321,
 'Organic_Chemical.json': 13818,
 'Biologically_Active_Substance.json': 15494,
 'Functional_Concept.json': 19792,
 'train_ebm_intervention_syn.json': 25540,
 'Diagnostic_Procedure.json': 33795,
 'Pharmacologic_Substance.json': 37882,
 'Laboratory__Procedure.json': 44098,
 'Health_Care_Activity.json': 60790,
 'Temporal_Concept.json': 70258,
 'Intellectual_Product.json': 86335,
 'Therapeutic_or_Preventive_Procedure.json': 145151}

## Load order-bound matches

In [39]:
def order_free_matches(x, orf_offsets):
    
    labs_modified = []
    
    for i, (identifier, offs, labs) in enumerate( zip(x.pmid, x.offsets, x.labels) ):
             
        lab_val = [v for k, v in ast.literal_eval(labs).items()] 
        off_val = ast.literal_eval(offs) 
        
        if str(identifier) in orf_offsets: 
            orf_matches =  orf_offsets[ str(identifier) ]
            match_indices = [ off_val.index(m) for m in orf_matches ]
            for i, l in enumerate(lab_val):
                if i in match_indices:
                    lab_val[i] = 1
                    
        labs_modified.append( lab_val )
        
        
    return labs_modified

In [40]:
ob_int = f'/mnt/nas2/results/Results/systematicReview/order_free_matching/EBM_PICO_training_matches/direct/{picos}/lf_ds_intervention_syn.tsv'
ob_int_syn = f'/mnt/nas2/results/Results/systematicReview/order_free_matching/EBM_PICO_training_matches/direct/{picos}/lf_ds_intervetion.tsv'

In [41]:
ob_int_df = pd.read_csv(ob_int, sep='\t', header=0)
ob_int_syn_df = pd.read_csv(ob_int_syn, sep='\t', header=0)
ob_merged_df = pd.concat([ob_int_df,ob_int_syn_df])

In [42]:
def process_gt(l):
    
    labels = l
    
    if isinstance(labels, str):
        labels = ast.literal_eval(labels)
        
    # convert non-1 fine labels labels to 1's
    labels = ['1' if (n != '1' and n != '0') else str(n) for i, n in enumerate(labels) ]
    
    return labels

ob_int_df['i'] = ob_int_df.i.apply(process_gt)
ob_int_syn_df['i'] = ob_int_syn_df.i.apply(process_gt)

ob_int_df['i_f'] = ob_int_df.i_f.apply(process_gt)
ob_int_syn_df['i_f'] = ob_int_syn_df.i_f.apply(process_gt)

In [43]:
# Fetch ground truth from the direct matching

coarse_int_gt = dict(zip(ob_int_df['pmid'], ob_int_df['i']))
fine_int_gt = dict(zip(ob_int_df['pmid'], ob_int_df['i_f']))

In [44]:
# Preprocess order-bound labels

def process_ob_labs(l):
    
    labels = l
    
    if isinstance( labels, str ):
        labels = ast.literal_eval(labels)

    labels = [ v for k, v in labels.items() ]
    labels = ['0' if n == -1 else str(n) for i, n in enumerate(labels) ]

    return labels

ob_int_df['labels'] = ob_int_df.labels.apply(process_ob_labs) # order bound matching labels for int source
ob_int_syn_df['labels'] = ob_int_syn_df.labels.apply(process_ob_labs) # order bound matching labels for int_syn source

In [45]:
# Fetch order-bound predictions for merged dataframes

ob_preds_merged = dict()

ob_int_dict = dict(zip(ob_int_df['pmid'], ob_int_df['labels']))
ob_int_syn_dict = dict(zip(ob_int_syn_df['pmid'], ob_int_syn_df['labels']))

for k,v in ob_int_dict.items():

    if k not in ob_preds_merged:
        ob_preds_merged[k] = []
        ob_preds_merged[k] = v

    else:
        old_pred = ob_preds_merged[k]
        new_pred = v
        
        # merge old and new predictions
        merged_predictions = [ max( o,n ) for o,n in zip( old_pred, new_pred ) ]
        assert len( old_pred ) == len( new_pred ) == len( merged_predictions )
        ob_preds_merged[k] = merged_predictions

In [46]:
len( list(ob_preds_merged.values()) )

4802

In [47]:
for k,v in ob_int_syn_dict.items():

    if k not in ob_preds_merged:
        ob_preds_merged[k] = []
        ob_preds_merged[k] = v

    else:
        old_pred = ob_preds_merged[k]
        new_pred = v

        # merge old and new predictions
        #print( 'merging the new predictions...' )
        merged_predictions = [ max( o,n ) for o,n in zip( old_pred, new_pred ) ]
        assert len( old_pred ) == len( new_pred ) == len( merged_predictions )
        ob_preds_merged[k] = merged_predictions

## merge and calculate evaluation metrics

In [48]:
def flatten(d):
    l = [ v for k,v in d.items() ]
    l = [item for sublist in l for item in sublist]
    l = list(map(int, l))
    return l

In [49]:
# ground truth : coarse_int_gt, fine_int_gt
# ob/direct matching preds :  ob_preds_merged

order_bound_preds = flatten( ob_preds_merged )
picos_coarse = flatten( coarse_int_gt )
picos_fine = flatten( fine_int_gt )

In [50]:
def merge_preds_ordered( ob_preds, orf_offsets ):
    
    merged_predictions = dict()
    
    for k, v in ob_preds.items(): # ob preds are the old preds
        
        k = str(k)

        if k in orf_offsets:
            matching_offsets = orf_offsets[k]
            old_preds = list( map( int, v ))
            new_preds = list( map( int, v ))
                             
            # add new offsets to the new_preds
            for indice in matching_offsets:
                new_preds[indice] = 1

            merged_predictions[ k ] = new_preds
        else:
            merged_predictions[ k ] = list( map( int, v ) )
    
    return merged_predictions

In [51]:
all_orf_ordered = list(order_offset_sorteddict.keys())
all_orf_ordered.insert( 0, 'order bound' )

In [31]:
base_preds = dict()
base_preds_flattened = []


for count, i in enumerate(all_orf_ordered):
    
    # get offsets and merge with the ob ones
    if count == 0:
        base_preds = ob_preds_merged
        base_preds_flattened =  flatten( ob_preds_merged )
    else:
        # Add more dicts to orderbound preds and modify base_preds_flattened
        updated_of_preds = merge_preds_ordered( base_preds, orfs_dict[i] )
        base_preds_flattened =  flatten( updated_of_preds )
        base_preds = updated_of_preds
    
    
    # Classification report
    cr_order_bound_coarse = classification_report( picos_coarse, base_preds_flattened, digits=4  )
    print(f'Confusion matrix for coarse-grained ground truth and {i} matches')
    print( cr_order_bound_coarse )

    cr_order_bound_fine = classification_report( picos_fine, base_preds_flattened, digits=4  )
    print(f'\n\nConfusion matrix for fine-grained ground truth and {i} matches')
    print( cr_order_bound_fine )
    
    print( '--------------------------------------------------------------------------' )

Confusion matrix for coarse-grained ground truth and order bound matches
              precision    recall  f1-score   support

           0     0.9295    0.7394    0.8236   1177209
           1     0.1635    0.4761    0.2434    125960

    accuracy                         0.7139   1303169
   macro avg     0.5465    0.6077    0.5335   1303169
weighted avg     0.8555    0.7139    0.7675   1303169



Confusion matrix for fine-grained ground truth and order bound matches
              precision    recall  f1-score   support

           0     0.9565    0.7385    0.8335   1212904
           1     0.1352    0.5492    0.2169     90265

    accuracy                         0.7254   1303169
   macro avg     0.5459    0.6438    0.5252   1303169
weighted avg     0.8997    0.7254    0.7908   1303169

--------------------------------------------------------------------------
Confusion matrix for coarse-grained ground truth and Professional_Society.json matches
              precision    recall  f1-



Confusion matrix for fine-grained ground truth and Functional_Concept.json matches
              precision    recall  f1-score   support

           0     0.9565    0.6962    0.8059   1212904
           1     0.1234    0.5744    0.2031     90265

    accuracy                         0.6878   1303169
   macro avg     0.5399    0.6353    0.5045   1303169
weighted avg     0.8988    0.6878    0.7641   1303169

--------------------------------------------------------------------------
Confusion matrix for coarse-grained ground truth and Temporal_Concept.json matches
              precision    recall  f1-score   support

           0     0.9284    0.6891    0.7911   1177209
           1     0.1476    0.5033    0.2283    125960

    accuracy                         0.6711   1303169
   macro avg     0.5380    0.5962    0.5097   1303169
weighted avg     0.8529    0.6711    0.7367   1303169



Confusion matrix for fine-grained ground truth and Temporal_Concept.json matches
              precis

Confusion matrix for coarse-grained ground truth and Health_Care_Activity.json matches
              precision    recall  f1-score   support

           0     0.9267    0.6176    0.7412   1177209
           1     0.1321    0.5437    0.2125    125960

    accuracy                         0.6105   1303169
   macro avg     0.5294    0.5807    0.4769   1303169
weighted avg     0.8499    0.6105    0.6901   1303169



Confusion matrix for fine-grained ground truth and Health_Care_Activity.json matches
              precision    recall  f1-score   support

           0     0.9562    0.6185    0.7511   1212904
           1     0.1077    0.6191    0.1835     90265

    accuracy                         0.6185   1303169
   macro avg     0.5320    0.6188    0.4673   1303169
weighted avg     0.8974    0.6185    0.7118   1303169

--------------------------------------------------------------------------
Confusion matrix for coarse-grained ground truth and Therapeutic_or_Preventive_Procedure.json mat

In [52]:
base_preds = dict()
base_preds_flattened = []


for count, i in enumerate(all_orf_ordered):
    
    # get offsets and merge with the ob ones
    if count == 0:
        base_preds = ob_preds_merged
        base_preds_flattened =  flatten( ob_preds_merged )
    else:
        # Add more dicts to orderbound preds and modify base_preds_flattened
        updated_of_preds = merge_preds_ordered( base_preds, orfs_dict[i] )
        base_preds_flattened =  flatten( updated_of_preds )
        base_preds = updated_of_preds
    
    
    # Classification report
    cr_order_bound_coarse = classification_report( picos_coarse, base_preds_flattened, digits=4  )
    print(f'Confusion matrix for coarse-grained ground truth and {i} matches')
    print( cr_order_bound_coarse )

    cr_order_bound_fine = classification_report( picos_fine, base_preds_flattened, digits=4  )
    print(f'\n\nConfusion matrix for fine-grained ground truth and {i} matches')
    print( cr_order_bound_fine )
    
    print( '--------------------------------------------------------------------------' )

Confusion matrix for coarse-grained ground truth and order bound matches
              precision    recall  f1-score   support

           0     0.9295    0.7394    0.8236   1177209
           1     0.1635    0.4761    0.2434    125960

    accuracy                         0.7139   1303169
   macro avg     0.5465    0.6077    0.5335   1303169
weighted avg     0.8555    0.7139    0.7675   1303169



Confusion matrix for fine-grained ground truth and order bound matches
              precision    recall  f1-score   support

           0     0.9565    0.7385    0.8335   1212904
           1     0.1352    0.5492    0.2169     90265

    accuracy                         0.7254   1303169
   macro avg     0.5459    0.6438    0.5252   1303169
weighted avg     0.8997    0.7254    0.7908   1303169

--------------------------------------------------------------------------
Confusion matrix for coarse-grained ground truth and Professional_Society.json matches
              precision    recall  f1-



Confusion matrix for fine-grained ground truth and Idea_or_Concept.json matches
              precision    recall  f1-score   support

           0     0.9568    0.7322    0.8296   1212904
           1     0.1338    0.5559    0.2157     90265

    accuracy                         0.7200   1303169
   macro avg     0.5453    0.6440    0.5226   1303169
weighted avg     0.8998    0.7200    0.7870   1303169

--------------------------------------------------------------------------
Confusion matrix for coarse-grained ground truth and Organic_Chemical.json matches
              precision    recall  f1-score   support

           0     0.9299    0.7323    0.8194   1177209
           1     0.1622    0.4841    0.2429    125960

    accuracy                         0.7084   1303169
   macro avg     0.5460    0.6082    0.5312   1303169
weighted avg     0.8557    0.7084    0.7637   1303169



Confusion matrix for fine-grained ground truth and Organic_Chemical.json matches
              precision

Confusion matrix for coarse-grained ground truth and Intellectual_Product.json matches
              precision    recall  f1-score   support

           0     0.9295    0.7106    0.8054   1177209
           1     0.1550    0.4962    0.2362    125960

    accuracy                         0.6899   1303169
   macro avg     0.5423    0.6034    0.5208   1303169
weighted avg     0.8546    0.6899    0.7504   1303169



Confusion matrix for fine-grained ground truth and Intellectual_Product.json matches
              precision    recall  f1-score   support

           0     0.9570    0.7101    0.8152   1212904
           1     0.1279    0.5712    0.2090     90265

    accuracy                         0.7005   1303169
   macro avg     0.5424    0.6406    0.5121   1303169
weighted avg     0.8996    0.7005    0.7733   1303169

--------------------------------------------------------------------------
Confusion matrix for coarse-grained ground truth and Therapeutic_or_Preventive_Procedure.json mat