In [355]:
import enum
import glob
import os
from hashlib import new
from pathlib import Path
import time
from itertools import product

import functools

import numpy as np
import pandas as pd
import scipy
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

#from snorkel.labeling.model import LabelModel
from snorkel.labeling.model import LabelModel as LMsnorkel
from snorkel.labeling import PandasLFApplier

from snorkel.labeling.model import MajorityLabelVoter

from sklearn.model_selection import train_test_split
import itertools
import ast

In [356]:
candgen_version = 'v4' # version = {v3, v4, ...}

In [357]:
import joblib
import json
import collections

In [358]:
from sklearn.exceptions import UndefinedMetricWarning

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [359]:
def list2Nested(l, nested_length):
    return [l[i:i+nested_length] for i in range(0, len(l), nested_length)]

In [360]:
# 1:1 positive to positive
# -1:0 negative cand_gen to negative in label model
# 0:-1 Abstain cand_gen to abstain in label model

# In study type, abstain is actually a negative instance 
#labelModel_mapper_LF = {1:1, 0:0, -1:-1}
labelModel_mapper_LF = {1:1, -1:0, 0:-1}

In [361]:
import LMutils

train_file = f'/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/{candgen_version}/gt/train_ebm_labels_tui_pio3.tsv'
training_data = pd.read_csv(train_file, sep='\t', header=0)

ebm_test_file = f'/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/{candgen_version}/gt/test_ebm_labels_tui_pio3.tsv'
test_ebm_data = pd.read_csv(ebm_test_file, sep='\t', header=0)
test_ebm_data.rename( columns={'Unnamed: 0':'series'}, inplace=True )

physio_test_file = f'/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/{candgen_version}/gt/test_physio_labels_tui_pio3.tsv'
test_physio_data = pd.read_csv(physio_test_file, sep='\t', header=0)
test_physio_data.rename( columns={'Unnamed: 0':'series'}, inplace=True )

ebm_test_corrected_file = f'/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/{candgen_version}/gt/test_ebm_correctedlabels_tui_pio3.tsv'
test_ebm_corrected_data = pd.read_csv(ebm_test_corrected_file, sep='\t', header=0)
test_ebm_corrected_data.rename( columns={'Unnamed: 0':'series'}, inplace=True )

In [362]:
def flatten_df(df):

    df_series = [ index for index, value in df.tokens.items() for word in ast.literal_eval(value) ]
    df_tokens = [ word for index, value in df.tokens.items() for word in ast.literal_eval(value) ]
    df_pos = [ word for index, value in df.pos.items() for word in ast.literal_eval(value) ]
    df_offsets = [ word for index, value in df.offsets.items() for word in ast.literal_eval(value) ]


    df_p = [ int(lab) for index, value in df.p.items() for lab in ast.literal_eval(value) ]
    df_p_fine = [ int(lab) for index, value in df.p_f.items() for lab in ast.literal_eval(value) ]
    df_i = [ int(lab) for index, value in df.i.items() for lab in ast.literal_eval(value) ]
    df_i_fine = [ int(lab) for index, value in df.i_f.items() for lab in ast.literal_eval(value) ]
    df_o = [ int(lab) for index, value in df.o.items() for lab in ast.literal_eval(value) ]
    df_o_fine = [ int(lab) for index, value in df.o_f.items() for lab in ast.literal_eval(value) ]
    df_s = [ int(lab) for index, value in df.s.items() for lab in ast.literal_eval(value) ]
    df_s_fine = [ int(lab) for index, value in df.s_f.items() for lab in ast.literal_eval(value) ]
    
    df_flattened = pd.DataFrame({ 'series': df_series,
                        'tokens' : df_tokens,
                        'offsets': df_offsets,
                        'pos': df_pos,
                        'p' : df_p,
                        'i' : df_i,
                        'o' : df_o,
                        's' : df_s,
                        'p_f' : df_p_fine,
                        'i_f' : df_i_fine,
                        'o_f' : df_o_fine,
                        's_f' : df_s_fine})
    
    return df_flattened

In [363]:
# Flatten the dataframes (currently only the training dataframe and test ebm dataframe with corrected labels can be flattened)
data_df = flatten_df(training_data)
test_ebm_data = flatten_df(test_ebm_data)
test_ebm_corr_df = flatten_df(test_ebm_corrected_data)

In [364]:
series = [
    data_df.series.to_numpy() ,
    test_ebm_data.series.to_numpy() ,
    test_physio_data.series.to_numpy(),   
    test_ebm_corr_df.series.to_numpy()
]


sents = [
    data_df.tokens.to_numpy() ,
    test_ebm_data.tokens.to_numpy() ,
    test_physio_data.tokens.to_numpy(),   
    test_ebm_corr_df.tokens.to_numpy()    
]


part_of_speech = [
    data_df.pos.to_numpy() ,
    test_ebm_data.pos.to_numpy() ,
    test_physio_data.pos.to_numpy(),   
    test_ebm_corr_df.pos.to_numpy()     
]


offsets = [
    data_df.offsets.to_numpy() ,
    test_ebm_data.offsets.to_numpy() ,
    test_physio_data.offsets.to_numpy(),   
    test_ebm_corr_df.offsets.to_numpy() 
]


Y_p = [
    data_df.p.to_numpy() , # 0 -7
    data_df.p_f.to_numpy() , # 1 -6
    test_ebm_data.p.to_numpy() , # 2 -5
    test_ebm_data.p_f.to_numpy() , # 3 -4
    test_physio_data.p.to_numpy(),  # 4 -3
    test_ebm_corr_df.p.to_numpy(),   # 5 -2
    test_ebm_corr_df.p_f.to_numpy() # 6 -1
]


Y_i = [
    data_df.i.to_numpy() , # 0 -7
    data_df.i_f.to_numpy() , # 1 -6
    test_ebm_data.i.to_numpy() , # 2 -5
    test_ebm_data.i_f.to_numpy() , # 3 -4
    test_physio_data.i.to_numpy(),  # 4 -3
    test_ebm_corr_df.i.to_numpy(),   # 5 -2
    test_ebm_corr_df.i_f.to_numpy() # 6 -1
]


Y_o = [
    data_df.o.to_numpy() , # 0 -7
    data_df.o_f.to_numpy() , # 1 -6
    test_ebm_data.o.to_numpy() , # 2 -5
    test_ebm_data.o_f.to_numpy() , # 3 -4
    test_physio_data.o.to_numpy(),  # 4 -3
    test_ebm_corr_df.o.to_numpy(),   # 5 -2
    test_ebm_corr_df.o_f.to_numpy() # 6 -1
]

Y_s = [
    data_df.s.to_numpy() , # 0 -7
    data_df.s_f.to_numpy() , # 1 -6
    test_ebm_data.s.to_numpy() , # 2 -5
    test_ebm_data.s_f.to_numpy() , # 3 -4
    test_physio_data.s.to_numpy(),  # 4 -3
    test_ebm_corr_df.s.to_numpy(),   # 5 -2
    test_ebm_corr_df.s_f.to_numpy() # 6 -1
]

In [365]:
# Write data for error analysis

error_analysis_ebm_p = pd.DataFrame({'tokens' : test_ebm_data.tokens,
                                'participant' : test_ebm_data.p,
                                'participant_fine' : test_ebm_data.p_f }, 
                                columns=['tokens','participant', 'participant_fine'])

#error_analysis_ebm_p.to_csv (r'/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/error_analysis/test_ebmgold_p', index = None, header=True) 

In [366]:
# Write data for error analysis

error_analysis_ebmcorr_p = pd.DataFrame({'tokens' : test_ebm_corr_df.tokens,
                                'participant' : test_ebm_corr_df.p,
                                'participant_fine' : test_ebm_corr_df.p_f }, 
                                columns=['tokens','participant', 'participant_fine'])

#error_analysis_ebmcorr_p.to_csv (r'/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/error_analysis/test_ebmgoldcorr_p', index = None, header=True) 

In [367]:
def df_to_list(data_column):
    return [ word for index, value in data_column.items() for word in ast.literal_eval(value) ]

In [368]:
def df_to_array(data_column):
    return np.array( [ word for index, value in data_column.items() for word in ast.literal_eval(value) ] )

In [369]:
def dict_to_array(label_column):
    return np.array( [ labelModel_mapper_LF[int(lab)] for index, value in label_column.items() for k, lab in ast.literal_eval(value).items() ] )

In [409]:
def get_lfs(indir):
    
    pathlist = Path(indir).glob('**/*.tsv')

    tokens = ''

    lfs = dict()
    lfs_lm = dict()

    for counter, file in enumerate(pathlist):
        
        if '/S/' in str(file):

            k = str( file ).split(f'/{candgen_version}/')[-1].replace('.tsv', '').replace('/', '_')
            mypath = Path(file)
            if mypath.stat().st_size != 0:
                data = pd.read_csv(file, sep='\t', header=0)

                data_tokens = data.tokens
                if len(tokens) < 5:
                    tokens = df_to_array(data_tokens)

                data_labels = data.labels
                #print(data_labels[1:2])
                labels = dict_to_array(data_labels)
                #print(labels)
                if len(labels) != len(tokens):
                    print(k, len(labels) , len(tokens) )
                #assert len(labels) == len(tokens)
                lfs[k] = labels


    print( 'Total number of tokens in validation set: ', len(tokens) )
    print( 'Total number of LFs in the dictionary', len(lfs) )
    
    return lfs

In [410]:
indir = f'/mnt/nas2/results/Results/systematicReview/distant_pico/training_ebm_candidate_generation/{candgen_version}'
train_ebm_lfs = get_lfs(indir)

Total number of tokens in validation set:  1303169
Total number of LFs in the dictionary 32


In [372]:
indir_test_ebm_corr = f'/mnt/nas2/results/Results/systematicReview/distant_pico/test_ebm_anjani_candidate_generation/{candgen_version}'
test_ebm_corr_lfs = get_lfs(indir_test_ebm_corr)

Total number of tokens in validation set:  52582
Total number of LFs in the dictionary 32


In [414]:
indir_test_ebm = f'/mnt/nas2/results/Results/systematicReview/distant_pico/test_ebm_candidate_generation/{candgen_version}'
test_ebm_lfs = get_lfs(indir_test_ebm)

Total number of tokens in validation set:  51784
Total number of LFs in the dictionary 32


In [415]:
# drop some lfs
def drop_nopositive(lfs_d):
    
    dropped_conditions = dict()

    for k, v in lfs_d.items():
        
        dropped_conditions[k] = v
        '''
        if '_cto' not in str(k) and '_s_heurpattern_labels_2' not in str(k):
            dropped_conditions[k] = v
        else:
            pass
        '''
            
    return dropped_conditions


In [416]:
train_ebm_lfs_dropped = drop_nopositive(train_ebm_lfs)
test_ebm_corr_lfs_dropped = drop_nopositive(test_ebm_corr_lfs)
test_ebm_lfs_dropped = drop_nopositive(test_ebm_lfs)

In [417]:
test_ebm_lfs_dropped.keys()

dict_keys(['dictionary_fuzzy_S_lf_dict_s_type', 'dictionary_fuzzy_S_lf_dict_s_comp_type', 'dictionary_fuzzy_S_lf_dict_s_type_negs', 'dictionary_fuzzy_S_lf_dict_s_comp_type_negs', 'dictionary_direct_S_lf_dict_s_type', 'dictionary_direct_S_lf_dict_s_comp_type', 'dictionary_direct_S_lf_dict_s_type_negs', 'dictionary_direct_S_lf_dict_s_comp_type_negs', 'nonUMLS_fuzzy_S_lf_s_cto', 'nonUMLS_fuzzy_S_lf_s_cto_syn', 'nonUMLS_direct_S_lf_s_cto', 'nonUMLS_direct_S_lf_s_cto_syn', 'heuristics_direct_S_lf_lf_lf_s_heurpattern_labels', 'heuristics_direct_S_lf_regex_stdtype', 'heuristics_direct_S_lf_regex_phase_negs', 'heuristics_direct_S_lf_regex_stdtype_negs', 'heuristics_direct_S_lf_dict_s_abb_negs', 'heuristics_direct_S_lf_regex_stdtype_types_negs', 'heuristics_direct_S_lf_regex_stdtype_basic_negs', 'heuristics_direct_S_lf_regex_stdtype_proc', 'heuristics_direct_S_lf_regex_phase', 'heuristics_direct_S_lf_regex_placebo_negs', 'heuristics_direct_S_lf_regex_stdtype_types', 'heuristics_direct_S_lf_rege

In [418]:
def lf_levels(umls_d:dict, pattern:str, picos:str):

    umls_level = dict()

    for key, value in umls_d.items():   # iter on both keys and values
        search_pattern = pattern + picos
        if key.startswith(search_pattern):
            k = str(key).split('_')[-1]
            umls_level[ k ] = value

    return umls_level


# Level 1: UMLS
umls_p = [
    lf_levels(train_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

umls_p_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'P') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

umls_p_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

# ------------------------------------------------------------------------

umls_i = [
    lf_levels(train_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

umls_i_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'I') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

umls_i_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

# ------------------------------------------------------------------------

umls_o = [
    lf_levels(train_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

umls_o_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'O') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

umls_o_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['UMLS_direct_', 'UMLS_fuzzy_'])
]

# ------------------------------------------------------------------------
# ------------------------------------------------------------------------


# Level 2: non UMLS
nonumls_p = [
    lf_levels(train_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_p_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'P') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_p_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

# ------------------------------------------------------------------------


nonumls_i = [
    lf_levels(train_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_i_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'I') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_i_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

# ------------------------------------------------------------------------

nonumls_o = [
    lf_levels(train_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_o_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'O') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_o_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

# ------------------------------------------------------------------------

nonumls_s = [
    lf_levels(train_ebm_lfs_dropped, name, 'S') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_s_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'S') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

nonumls_s_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'S') 
    for i, name in enumerate(['nonUMLS_direct_', 'nonUMLS_fuzzy_'])
]

# ------------------------------------------------------------------------
# ------------------------------------------------------------------------

# Level 3: DS
ds_p = [
    lf_levels(train_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

ds_p_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'P') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

ds_p_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]


# ------------------------------------------------------------------------

ds_i = [
    lf_levels(train_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

ds_i_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'I') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

ds_i_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

# ------------------------------------------------------------------------

ds_o = [
    lf_levels(train_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

ds_o_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'O') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

ds_o_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['ds_direct_', 'ds_fuzzy_'])
]

# ------------------------------------------------------------------------
# ------------------------------------------------------------------------


# Level 4: dictionary, rules, heuristics
heur_p = [
    lf_levels(train_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['heuristics_direct_'])
]

heur_p_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'P') 
    for i, name in enumerate(['heuristics_direct_'])
]

heur_p_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['heuristics_direct_'])
]

# ------------------------------------------------------------------------

heur_i = [
    lf_levels(train_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['heuristics_direct_'])
]

heur_i_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'I') 
    for i, name in enumerate(['heuristics_direct_'])
]

heur_i_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['heuristics_direct_'])
]

# ------------------------------------------------------------------------

heur_o = [
    lf_levels(train_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['heuristics_direct_'])
]

heur_o_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'O') 
    for i, name in enumerate(['heuristics_direct_'])
]


heur_o_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['heuristics_direct_'])
]

# ------------------------------------------------------------------------

heur_s = [
    lf_levels(train_ebm_lfs_dropped, name, 'S') 
    for i, name in enumerate(['heuristics_direct_'])
]

heur_s_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'S') 
    for i, name in enumerate(['heuristics_direct_'])
]


heur_s_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'S') 
    for i, name in enumerate(['heuristics_direct_'])
]

# ------------------------------------------------------------------------
# ------------------------------------------------------------------------


dict_p = [
    lf_levels(train_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_p_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'P') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_p_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'P') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

# ------------------------------------------------------------------------

dict_i = [
    lf_levels(train_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_i_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'I') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_i_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'I') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

# ------------------------------------------------------------------------

dict_o = [
    lf_levels(train_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_o_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'O') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_o_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'O') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

# ------------------------------------------------------------------------

dict_s = [
    lf_levels(train_ebm_lfs_dropped, name, 'S') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_s_testcorrected = [
    lf_levels(test_ebm_corr_lfs_dropped, name, 'S') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

dict_s_testebm = [
    lf_levels(test_ebm_lfs_dropped, name, 'S') 
    for i, name in enumerate(['dictionary_direct_', 'dictionary_fuzzy_'])
]

In [419]:
# Study Type data

#train_s_candidates = [nonumls_s[1], dict_s[1], heur_s[0]]
#test_s_ebm_corr_candidates = [nonumls_s_testcorrected[1], dict_s_testcorrected[1], heur_s_testcorrected[0]]
#test_s_ebm_candidates = [nonumls_s_testebm[1], dict_s_testebm[1], heur_s_testebm[0]]


train_s_candidates = [dict_s[1], heur_s[0]]
test_s_ebm_corr_candidates = [dict_s_testcorrected[1], heur_s_testcorrected[0]]
test_s_ebm_candidates = [dict_s_testebm[1], heur_s_testebm[0]]

In [420]:
param_grid = {
    'lr': [0.001, 0.0001],
    'l2': [0.001, 0.0001],
    'n_epochs': [50, 100, 200, 600, 700, 1000, 2000, 5000],
    'prec_init': [0.6, 0.7, 0.8, 0.9],
    'optimizer': ["adamax", "adam", "sgd"],
    'lr_scheduler': ['constant'],
}

In [421]:
def sample_param_grid(param_grid, seed):
    """ Sample parameter grid
    :param param_grid:
    :param seed:
    :return:
    """
    rstate = np.random.get_state()
    np.random.seed(seed)
    params = list(product(*[param_grid[name] for name in param_grid]))
    np.random.shuffle(params)
    np.random.set_state(rstate)
    return params

In [422]:
def grid_search(model_class,
                model_class_init,
                param_grid,
                train=None,
                dev=None,
                other_train=None,
                n_model_search=5,
                val_metric='f1_macro',
                seed=1234,
                checkpoint_gt_mv=False,
                tag_fmt_ckpnt='IO'):
    
    
    """Simple grid search helper function
    Parameters
    ----------
    model_class
    model_class_init
    param_grid
    train
    dev
    n_model_search
    val_metric
    seed

    Returns
    -------
    """
    
    L_train, Y_train = train
    L_dev, Y_dev = dev

    # sample configs
    params = sample_param_grid(param_grid, seed)[:n_model_search]

    defaults = {'seed': seed}
    best_score, best_config = 0.0, None
    print(f"Grid search over {len(params)} configs")

    for i, config in enumerate(params):
        print(f'[{i}] Label Model')
        config = dict(zip(param_grid.keys(), config))
        config.update({param: value for param, value in defaults.items() if param not in config})

        model = model_class(**model_class_init)
        model.fit(L_train, Y_dev, **config)
        
        y_pred = model.predict(L_dev)
        
                
        if -1 in y_pred:
            print("Label model predicted -1 (TODO: this happens inconsistently)")
            continue
    
    
    return model, best_config, best_score

In [423]:
def predict_plus(cands, best_model, gt_labels, mode=None):
    
    combined_lf = []
    combined_lf.extend( list(cands[0].values()) ) # Combine with level 4
    combined_lf.extend( list(cands[1].values()) ) # combine with level 4


    L = np.array( combined_lf )
    L = np.transpose(L)
    
    if mode == 'only_pred':
    
        predictions_probablities = best_model.predict_proba(L)
        predictions = best_model.predict(L , tie_break_policy = "abstain")
    
        return predictions_probablities
        
    else:
        
        counter = 0
        
        predictions_probablities = best_model.predict_proba(L)
        predictions = best_model.predict(L , tie_break_policy = "abstain")
    
        groundtruth = np.array(gt_labels) 

        #groundtruth = [-1 if x == 0 else x for x in gt_labels] # XXX if "test_ebm_correct"
        groundtruth = np.array(groundtruth)

        groundtruth_ = []
        predictions_ = []
        for g, p in zip(groundtruth, np.array(predictions)):
            #print( g, p )
            if p == -1:
                counter = counter + 1
                pass
                #print( 'model predicts -1 inconsistently.' )
            else:
                groundtruth_.append(g)
                predictions_.append(p)

        
        print('Total number of tokens missed: ' , counter  )


        cr = classification_report( groundtruth_, predictions_, digits=4, output_dict=True )
        cr_ = classification_report( groundtruth_, predictions_, digits=4 )
        print( cr_ )
    
        return predictions_probablities, cr

In [424]:
# for entities that do not have UMLS partitions

def train_plus(train_cands, test_cands, test_corr_cands, Y_d, picos, paramgrid, mode):
   
    gold_labels = ''
    gold_labels_fine = ''
    
    
    model_class_init = {
        'cardinality': 2, 
        'verbose': True
    }

    num_hyperparams = functools.reduce(lambda x,y:x*y, [len(x) for x in param_grid.values()])
    print("Hyperparamater Search Space:", num_hyperparams)
    n_model_search = 50
    

    '''#########################################################################
    # Choosing the number of LF's from UMLS all
    #########################################################################'''


    best_f1_macro = 0.0
    best_overall_model = ''
    best_overall_config = ''

    combined_lf = []
    combined_lf.extend( list(train_cands[0].values()) ) 
    combined_lf.extend( list(train_cands[1].values()) )
    
    L = np.array( combined_lf )
    L = np.transpose(L)

    # sample configs
    params = sample_param_grid(param_grid, 0)[:n_model_search]
    defaults = {'seed': 0}
    print(f"Grid search over {len(params)} configs")

    for i, config in enumerate(params):
        print(f'[{i}] Label Model')
        config = dict(zip(param_grid.keys(), config))
        config.update({param: value for param, value in defaults.items() if param not in config})

        #label_model = LabelModel(cardinality=2, verbose=True)
        #label_model.fit(L_train=L, **config)
        
        label_model = LMsnorkel(**model_class_init)
        label_model.fit(L, **config)

        # Predict on the test ebm correct set
        preds, class_report = test_corr_probas = predict_plus(test_corr_cands, label_model, Y_d[-2]) # test ebm correct   
        
        if class_report['macro avg']['f1-score'] > best_f1_macro:
            best_f1_macro = class_report['macro avg']['f1-score']
            best_overall_model = label_model
            best_overall_config = config
            
    # Save the best label model
    print('Save the best overall model, configuration and partition for this experiment level')
    # Save your model or results
    save_dir = f'/mnt/nas2/results/Results/systematicReview/distant_pico/models/LabelModels/{picos}/'
    filename = 'stpartition_' + '_epoch_' + str(best_overall_config['n_epochs'])
    joblib.dump(best_overall_model, f'{save_dir}/{filename}.pkl') 
    joblib.dump(best_overall_config, f'{save_dir}/{filename}.json')
    
    
    #load your model for further usage
    loaded_best_model = joblib.load(f'{save_dir}/{filename}.pkl')
    
    # Predict on the training set
    train_probas = predict_plus(train_cands,loaded_best_model, Y_d[-6], mode= 'only_pred') # train 
    
    # Write training predictions to file
    # tokens	pos	offsets	labels	true_labels
    #print( train_probas.shape )
    train_probas = train_probas.tolist()
    #print( len(train_probas) )
    #train_probas = [list(tp) for tp in train_probas]
    #train_probas_series = pd.Series(list(train_probas))
    train_probas_series = pd.Series(train_probas)
    data_df['labels'] = train_probas_series.values

    # Write predictions on the training data to the file
    
    write_df = data_df.groupby(['series'])[['series', 'tokens', 'pos', 'offsets', 'labels', str(picos), str(picos)+'_f']].agg(list)
    write_file_path = f'/mnt/nas2/results/Results/systematicReview/distant_pico/predictions/LabelModels/{picos}/{filename}_bestmodel.tsv'
    write_df.to_csv (write_file_path, index = None, sep = '\t', header=True) 

In [425]:
# CandGen Version v4 all (with extra specified negatives - all UMLS, non-UMLS, dd, dicts, abbreviation NOT the ReGeX)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 384
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9695    0.9923    0.9807     26258
           1     0.6600    0.3245    0.4351      1214

    accuracy                         0.9628     27472
   macro avg     0.8147    0.6584    0.7079     27472
weighted avg     0.9558    0.9628    0.9566     27472

[1] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9726    0.9914    0.9819     26258
           1     0.6813    0.3962    0.5010      1214

    accuracy                         0.9651     27472
   macro avg     0.8270    0.6938    0.7415     27472
weighted avg     0.9597    0.9651    0.9607     27472

[2] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1  

Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9698    0.9920    0.9808     26258
           1     0.6591    0.3328    0.4423      1214

    accuracy                         0.9629     27472
   macro avg     0.8144    0.6624    0.7115     27472
weighted avg     0.9561    0.9629    0.9570     27472

[23] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                         0.9623     27472
   macro avg     0.8113    0.6531    0.7023     27472
weighted avg     0.9551    0.9623    0.9559     27472

[24] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                      

Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                         0.9623     27472
   macro avg     0.8113    0.6531    0.7023     27472
weighted avg     0.9551    0.9623    0.9559     27472

[45] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                         0.9623     27472
   macro avg     0.8113    0.6531    0.7023     27472
weighted avg     0.9551    0.9623    0.9559     27472

[46] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                      

In [385]:
# CandGen Version v4 all (with extra specified negatives - all UMLS, non-UMLS, dd, dicts, abbreviation NOT the ReGeX)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 384
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9698    0.9923    0.9809     26258
           1     0.6656    0.3328    0.4437      1214

    accuracy                         0.9631     27472
   macro avg     0.8177    0.6625    0.7123     27472
weighted avg     0.9564    0.9631    0.9572     27472

[1] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9726    0.9914    0.9819     26258
           1     0.6813    0.3962    0.5010      1214

    accuracy                         0.9651     27472
   macro avg     0.8270    0.6938    0.7415     27472
weighted avg     0.9597    0.9651    0.9607     27472

[2] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1  

Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9698    0.9920    0.9808     26258
           1     0.6591    0.3328    0.4423      1214

    accuracy                         0.9629     27472
   macro avg     0.8144    0.6624    0.7115     27472
weighted avg     0.9561    0.9629    0.9570     27472

[23] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                         0.9623     27472
   macro avg     0.8113    0.6531    0.7023     27472
weighted avg     0.9551    0.9623    0.9559     27472

[24] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                      

Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                         0.9623     27472
   macro avg     0.8113    0.6531    0.7023     27472
weighted avg     0.9551    0.9623    0.9559     27472

[45] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                         0.9623     27472
   macro avg     0.8113    0.6531    0.7023     27472
weighted avg     0.9551    0.9623    0.9559     27472

[46] Label Model
Total number of tokens missed:  25110
              precision    recall  f1-score   support

           0     0.9690    0.9923    0.9805     26258
           1     0.6535    0.3138    0.4240      1214

    accuracy                      

In [354]:
# CandGen Version v4 all (with extra specified negatives - all UMLS, non-UMLS, dd, dicts, abbreviation NOT the ReGeX)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 384
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                         0.9625     27112
   macro avg     0.8145    0.6628    0.7118     27112
weighted avg     0.9556    0.9625    0.9565     27112

[1] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                         0.9625     27112
   macro avg     0.8145    0.6628    0.7118     27112
weighted avg     0.9556    0.9625    0.9565     27112

[2] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1  

Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                         0.9625     27112
   macro avg     0.8145    0.6628    0.7118     27112
weighted avg     0.9556    0.9625    0.9565     27112

[23] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                         0.9625     27112
   macro avg     0.8145    0.6628    0.7118     27112
weighted avg     0.9556    0.9625    0.9565     27112

[24] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                      

Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                         0.9625     27112
   macro avg     0.8145    0.6628    0.7118     27112
weighted avg     0.9556    0.9625    0.9565     27112

[45] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                         0.9625     27112
   macro avg     0.8145    0.6628    0.7118     27112
weighted avg     0.9556    0.9625    0.9565     27112

[46] Label Model
Total number of tokens missed:  25470
              precision    recall  f1-score   support

           0     0.9695    0.9919    0.9806     25898
           1     0.6596    0.3336    0.4431      1214

    accuracy                      

In [323]:
# CandGen Version v4 all (with extra specified negatives - all UMLS, non-UMLS, dd, dicts, NOT the ReGeX)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 384
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                         0.9624     27017
   macro avg     0.8145    0.6629    0.7119     27017
weighted avg     0.9555    0.9624    0.9564     27017

[1] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                         0.9624     27017
   macro avg     0.8145    0.6629    0.7119     27017
weighted avg     0.9555    0.9624    0.9564     27017

[2] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1  

Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                         0.9624     27017
   macro avg     0.8145    0.6629    0.7119     27017
weighted avg     0.9555    0.9624    0.9564     27017

[23] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                         0.9624     27017
   macro avg     0.8145    0.6629    0.7119     27017
weighted avg     0.9555    0.9624    0.9564     27017

[24] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                      

Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                         0.9624     27017
   macro avg     0.8145    0.6629    0.7119     27017
weighted avg     0.9555    0.9624    0.9564     27017

[45] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                         0.9624     27017
   macro avg     0.8145    0.6629    0.7119     27017
weighted avg     0.9555    0.9624    0.9564     27017

[46] Label Model
Total number of tokens missed:  25565
              precision    recall  f1-score   support

           0     0.9694    0.9919    0.9805     25804
           1     0.6596    0.3339    0.4433      1213

    accuracy                      

In [185]:
# CandGen Version v3 all

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 384
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9998    0.9447    0.9715     16442
           1     0.2910    0.9920    0.4499       376

    accuracy                         0.9458     16818
   macro avg     0.6454    0.9684    0.7107     16818
weighted avg     0.9840    0.9458    0.9598     16818

[1] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9998    0.9443    0.9713     16442
           1     0.2894    0.9920    0.4480       376

    accuracy                         0.9454     16818
   macro avg     0.6446    0.9682    0.7097     16818
weighted avg     0.9839    0.9454    0.9596     16818

[2] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9997    0.9448    0.9715     16442
           1  

Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9998    0.9443    0.9713     16442
           1     0.2894    0.9920    0.4480       376

    accuracy                         0.9454     16818
   macro avg     0.6446    0.9682    0.7097     16818
weighted avg     0.9839    0.9454    0.9596     16818

[23] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9997    0.9448    0.9715     16442
           1     0.2909    0.9894    0.4495       376

    accuracy                         0.9458     16818
   macro avg     0.6453    0.9671    0.7105     16818
weighted avg     0.9839    0.9458    0.9598     16818

[24] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9997    0.9448    0.9715     16442
           1     0.2909    0.9894    0.4495       376

    accuracy                      

Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9997    0.9448    0.9715     16442
           1     0.2909    0.9894    0.4495       376

    accuracy                         0.9458     16818
   macro avg     0.6453    0.9671    0.7105     16818
weighted avg     0.9839    0.9458    0.9598     16818

[45] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9997    0.9448    0.9715     16442
           1     0.2909    0.9894    0.4495       376

    accuracy                         0.9458     16818
   macro avg     0.6453    0.9671    0.7105     16818
weighted avg     0.9839    0.9458    0.9598     16818

[46] Label Model
Total number of tokens missed:  35764
              precision    recall  f1-score   support

           0     0.9997    0.9448    0.9715     16442
           1     0.2909    0.9894    0.4495       376

    accuracy                      

In [144]:
# CandGen Version v3 (removed heur_pattern and heur_pattern_2)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 384
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9998    0.9493    0.9739     16362
           1     0.3069    0.9919    0.4687       370

    accuracy                         0.9503     16732
   macro avg     0.6533    0.9706    0.7213     16732
weighted avg     0.9845    0.9503    0.9627     16732

[1] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9998    0.9492    0.9739     16362
           1     0.3063    0.9919    0.4681       370

    accuracy                         0.9502     16732
   macro avg     0.6531    0.9706    0.7210     16732
weighted avg     0.9845    0.9502    0.9627     16732

[2] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9997    0.9495    0.9740     16362
           1  

Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9998    0.9492    0.9739     16362
           1     0.3063    0.9919    0.4681       370

    accuracy                         0.9502     16732
   macro avg     0.6531    0.9706    0.7210     16732
weighted avg     0.9845    0.9502    0.9627     16732

[23] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9997    0.9495    0.9740     16362
           1     0.3068    0.9892    0.4683       370

    accuracy                         0.9503     16732
   macro avg     0.6533    0.9693    0.7211     16732
weighted avg     0.9844    0.9503    0.9628     16732

[24] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9997    0.9495    0.9740     16362
           1     0.3068    0.9892    0.4683       370

    accuracy                      

Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9997    0.9495    0.9740     16362
           1     0.3068    0.9892    0.4683       370

    accuracy                         0.9503     16732
   macro avg     0.6533    0.9693    0.7211     16732
weighted avg     0.9844    0.9503    0.9628     16732

[45] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9997    0.9495    0.9740     16362
           1     0.3068    0.9892    0.4683       370

    accuracy                         0.9503     16732
   macro avg     0.6533    0.9693    0.7211     16732
weighted avg     0.9844    0.9503    0.9628     16732

[46] Label Model
Total number of tokens missed:  35850
              precision    recall  f1-score   support

           0     0.9997    0.9495    0.9740     16362
           1     0.3068    0.9892    0.4683       370

    accuracy                      

In [116]:
# CandGen Version v4 (LFs = 14) # delete this later

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 336
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.9811    0.0357    0.0690      1455
           1     0.2270    0.9976    0.3698       413

    accuracy                         0.2484      1868
   macro avg     0.6041    0.5167    0.2194      1868
weighted avg     0.8144    0.2484    0.1355      1868

[1] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.9811    0.0357    0.0690      1455
           1     0.2270    0.9976    0.3698       413

    accuracy                         0.2484      1868
   macro avg     0.6041    0.5167    0.2194      1868
weighted avg     0.8144    0.2484    0.1355      1868

[2] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      1455
           1  

Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      1455
           1     0.2211    1.0000    0.3621       413

    accuracy                         0.2211      1868
   macro avg     0.1105    0.5000    0.1811      1868
weighted avg     0.0489    0.2211    0.0801      1868

[23] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      1455
           1     0.2207    0.9976    0.3614       413

    accuracy                         0.2206      1868
   macro avg     0.1103    0.4988    0.1807      1868
weighted avg     0.0488    0.2206    0.0799      1868

[24] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      1455
           1     0.2211    1.0000    0.3621       413

    accuracy                      

Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      1455
           1     0.2207    0.9976    0.3614       413

    accuracy                         0.2206      1868
   macro avg     0.1103    0.4988    0.1807      1868
weighted avg     0.0488    0.2206    0.0799      1868

[45] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      1455
           1     0.2211    1.0000    0.3621       413

    accuracy                         0.2211      1868
   macro avg     0.1105    0.5000    0.1811      1868
weighted avg     0.0489    0.2211    0.0801      1868

[46] Label Model
Total number of tokens missed:  50714
              precision    recall  f1-score   support

           0     0.9811    0.0357    0.0690      1455
           1     0.2270    0.9976    0.3698       413

    accuracy                      

In [95]:
# CandGen Version v4 (LFs = 14)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 336
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.9956    0.4967    0.6628       918
           1     0.4467    0.9947    0.6165       375

    accuracy                         0.6411      1293
   macro avg     0.7212    0.7457    0.6397      1293
weighted avg     0.8364    0.6411    0.6494      1293

[1] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.9956    0.4967    0.6628       918
           1     0.4467    0.9947    0.6165       375

    accuracy                         0.6411      1293
   macro avg     0.7212    0.7457    0.6397      1293
weighted avg     0.8364    0.6411    0.6494      1293

[2] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1  

Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                         0.2900      1293
   macro avg     0.1450    0.5000    0.2248      1293
weighted avg     0.0841    0.2900    0.1304      1293

[23] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2889    0.9947    0.4478       375

    accuracy                         0.2885      1293
   macro avg     0.1445    0.4973    0.2239      1293
weighted avg     0.0838    0.2885    0.1299      1293

[24] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                      

Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                         0.2900      1293
   macro avg     0.1450    0.5000    0.2248      1293
weighted avg     0.0841    0.2900    0.1304      1293

[45] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                         0.2900      1293
   macro avg     0.1450    0.5000    0.2248      1293
weighted avg     0.0841    0.2900    0.1304      1293

[46] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.9956    0.4967    0.6628       918
           1     0.4467    0.9947    0.6165       375

    accuracy                      

In [74]:
# CandGen Version v4 (LFs = 13)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 336
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.9956    0.4967    0.6628       918
           1     0.4467    0.9947    0.6165       375

    accuracy                         0.6411      1293
   macro avg     0.7212    0.7457    0.6397      1293
weighted avg     0.8364    0.6411    0.6494      1293

[1] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.9956    0.4967    0.6628       918
           1     0.4467    0.9947    0.6165       375

    accuracy                         0.6411      1293
   macro avg     0.7212    0.7457    0.6397      1293
weighted avg     0.8364    0.6411    0.6494      1293

[2] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1  

Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                         0.2900      1293
   macro avg     0.1450    0.5000    0.2248      1293
weighted avg     0.0841    0.2900    0.1304      1293

[23] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2889    0.9947    0.4478       375

    accuracy                         0.2885      1293
   macro avg     0.1445    0.4973    0.2239      1293
weighted avg     0.0838    0.2885    0.1299      1293

[24] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                      

Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                         0.2900      1293
   macro avg     0.1450    0.5000    0.2248      1293
weighted avg     0.0841    0.2900    0.1304      1293

[45] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       918
           1     0.2900    1.0000    0.4496       375

    accuracy                         0.2900      1293
   macro avg     0.1450    0.5000    0.2248      1293
weighted avg     0.0841    0.2900    0.1304      1293

[46] Label Model
Total number of tokens missed:  51289
              precision    recall  f1-score   support

           0     0.9956    0.4967    0.6628       918
           1     0.4467    0.9947    0.6165       375

    accuracy                      

In [58]:
# CandGen Version v4 (LFs = 12)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 336
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.9951    0.4665    0.6352       866
           1     0.4467    0.9947    0.6165       375

    accuracy                         0.6261      1241
   macro avg     0.7209    0.7306    0.6259      1241
weighted avg     0.8294    0.6261    0.6296      1241

[1] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.9951    0.4665    0.6352       866
           1     0.4467    0.9947    0.6165       375

    accuracy                         0.6261      1241
   macro avg     0.7209    0.7306    0.6259      1241
weighted avg     0.8294    0.6261    0.6296      1241

[2] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       866
           1  

Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       866
           1     0.3022    1.0000    0.4641       375

    accuracy                         0.3022      1241
   macro avg     0.1511    0.5000    0.2321      1241
weighted avg     0.0913    0.3022    0.1402      1241

[23] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       866
           1     0.3010    0.9947    0.4622       375

    accuracy                         0.3006      1241
   macro avg     0.1505    0.4973    0.2311      1241
weighted avg     0.0910    0.3006    0.1397      1241

[24] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       866
           1     0.3022    1.0000    0.4641       375

    accuracy                      

Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       866
           1     0.3010    0.9947    0.4622       375

    accuracy                         0.3006      1241
   macro avg     0.1505    0.4973    0.2311      1241
weighted avg     0.0910    0.3006    0.1397      1241

[45] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       866
           1     0.3022    1.0000    0.4641       375

    accuracy                         0.3022      1241
   macro avg     0.1511    0.5000    0.2321      1241
weighted avg     0.0913    0.3022    0.1402      1241

[46] Label Model
Total number of tokens missed:  51341
              precision    recall  f1-score   support

           0     0.9951    0.4665    0.6352       866
           1     0.4467    0.9947    0.6165       375

    accuracy                      

In [31]:
# CandGen Version v4 (LFs = 11)

predicted_s = train_plus(train_s_candidates, test_s_ebm_candidates, test_s_ebm_corr_candidates, Y_s, 's', paramgrid = param_grid, mode = 'pred')

Hyperparamater Search Space: 336
Grid search over 50 configs
[0] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.9909    0.8361    0.9069       781
           1     0.7393    0.9837    0.8442       369

    accuracy                         0.8835      1150
   macro avg     0.8651    0.9099    0.8756      1150
weighted avg     0.9102    0.8835    0.8868      1150

[1] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.9909    0.8361    0.9069       781
           1     0.7393    0.9837    0.8442       369

    accuracy                         0.8835      1150
   macro avg     0.8651    0.9099    0.8756      1150
weighted avg     0.9102    0.8835    0.8868      1150

[2] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       781
           1  

Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       781
           1     0.3209    1.0000    0.4858       369

    accuracy                         0.3209      1150
   macro avg     0.1604    0.5000    0.2429      1150
weighted avg     0.1030    0.3209    0.1559      1150

[23] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       781
           1     0.3197    0.9946    0.4838       369

    accuracy                         0.3191      1150
   macro avg     0.1598    0.4973    0.2419      1150
weighted avg     0.1026    0.3191    0.1553      1150

[24] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       781
           1     0.3209    1.0000    0.4858       369

    accuracy                      

Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.9817    0.2740    0.4284       781
           1     0.3916    0.9892    0.5611       369

    accuracy                         0.5035      1150
   macro avg     0.6866    0.6316    0.4948      1150
weighted avg     0.7923    0.5035    0.4710      1150

[45] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       781
           1     0.3209    1.0000    0.4858       369

    accuracy                         0.3209      1150
   macro avg     0.1604    0.5000    0.2429      1150
weighted avg     0.1030    0.3209    0.1559      1150

[46] Label Model
Total number of tokens missed:  51432
              precision    recall  f1-score   support

           0     0.9909    0.8361    0.9069       781
           1     0.7393    0.9837    0.8442       369

    accuracy                      