In [1]:
import enum
import glob
import os
from hashlib import new
from pathlib import Path
import time

import functools

import numpy as np
import pandas as pd
import scipy
from flyingsquid.label_model import LabelModel as LMsquid
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from snorkel.labeling.model import LabelModel as LMsnorkel
from snorkel.labeling.model import MajorityLabelVoter

In [2]:
from sklearn.exceptions import UndefinedMetricWarning

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [8]:
def list2Nested(l, nested_length):
    return [l[i:i+nested_length] for i in range(0, len(l), nested_length)]

In [9]:
# Fetch UMLS ranks

sum_lf_p = '/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/lf_p_summary_train.csv'
sum_lf_i = '/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/lf_i_summary_train.csv'
sum_lf_o = '/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/lf_o_summary_train.csv'


def fetchRank(sum_lf_d):
    
    ranked_umls_coverage = dict()    
    umls_coverage_ = dict()
    
    data=pd.read_csv(sum_lf_d, sep='\t')
    
    for index, row in data.iterrows():
        if row[0].startswith('UMLS_fuzzy_'):
            umls_coverage_[row[0]] = row[3]
    
    umls_coverage_sorted = sorted(umls_coverage_.items(), key=lambda x: x[1], reverse=True)
    
    for i in umls_coverage_sorted:
        k = str(i[0]).split('_')[-1]
        ranked_umls_coverage[k] = i[1]

    return ranked_umls_coverage

ranksorted_p_umls = fetchRank(sum_lf_p)
ranksorted_i_umls = fetchRank(sum_lf_i)
ranksorted_o_umls = fetchRank(sum_lf_o)

In [10]:
# Partition LF's

def partitionLFs(umls_d):
    
    keys = list(umls_d.keys())

    partitioned_lfs = [ ]
    
    for i in range( 0, len(keys) ):

        if i == 0 or i == len(keys):
            if i == 0:
                partitioned_lfs.append( [keys] )
            if i ==len(keys):
                temp3 = list2Nested(keys, 1)
                partitioned_lfs.append( temp3 )
        else:
            temp1, temp2 = keys[:i] , keys[i:]
            temp3 = list2Nested( keys[:i], 1)
            temp3.append( keys[i:] )
            partitioned_lfs.append( temp3 )
    
    return partitioned_lfs


partitioned_p_umls = partitionLFs(ranksorted_p_umls)
partitioned_i_umls = partitionLFs(ranksorted_i_umls)
partitioned_o_umls = partitionLFs(ranksorted_o_umls)

In [11]:
import LMutils

# validation_labels   
# validation_labels_tui_pio2   
file = '/mnt/nas2/results/Results/systematicReview/distant_pico/EBM_PICO_GT/validation_labels_tui_pio2.tsv'
df_data = pd.read_csv(file, sep='\t', header=0)

In [13]:
Y_tokens = df_data['tokens']
df_data_train, df_data_val = train_test_split(df_data, test_size=0.20, shuffle=False)

In [17]:
# Read Candidate labels from multiple LFs
indir = '/mnt/nas2/results/Results/systematicReview/distant_pico/candidate_generation'
pathlist = Path(indir).glob('**/*.tsv')

tokens = []

lfs = dict()

for file in pathlist:

    k = str( file ).split('candidate_generation/')[-1].replace('.tsv', '').replace('/', '_')
    mypath = Path(file)
    if mypath.stat().st_size != 0:
        data = pd.read_csv(file, sep='\t', header=0)
    if len(tokens) == 0:
        tokens.extend( list(data.tokens) )
    
    sab = data.columns[-1]
    if len(list( data[sab] )) == 1354953:
        lfs[str(k)] = list( data[sab] )[:len(Y_tokens)]


print( 'Total number of tokens in validation set: ', len(tokens) )
print( 'Total number of LFs in the dictionary', len(lfs) )

Total number of tokens in validation set:  1354953
Total number of LFs in the dictionary 617


In [18]:
dict_p_lfs = pd.DataFrame({ key:pd.Series(value) for key, value in lfs.items() if '_P_lf_' in key or '_p_lf_' in key })
dict_i_lfs = pd.DataFrame({ key:pd.Series(value) for key, value in lfs.items() if '_I_lf_' in key or '_i_lf_' in key })
dict_o_lfs = pd.DataFrame({ key:pd.Series(value) for key, value in lfs.items() if '_O_lf_' in key or '_o_lf_' in key })

In [19]:
def lf_levels(umls_d:dict, pattern:str, picos:str):

    umls_level = dict()

    for key, value in umls_d.items():   # iter on both keys and values
        search_pattern = pattern + picos
        if key.startswith(search_pattern):
            k = str(key).split('_')[-1]
            umls_level[ k ] = value

    return umls_level

# Level 1: UMLS
umls_p = lf_levels(lfs, 'UMLS_fuzzy_', 'p')
umls_i = lf_levels(lfs, 'UMLS_fuzzy_', 'i')
umls_o = lf_levels(lfs, 'UMLS_fuzzy_', 'o')

# Level 2: non UMLS
nonumls_p = lf_levels(lfs, 'nonUMLS_fuzzy_', 'P')
nonumls_i = lf_levels(lfs, 'nonUMLS_fuzzy_', 'I')
nonumls_o = lf_levels(lfs, 'nonUMLS_fuzzy_', 'O')

# Level 3: DS
ds_p = lf_levels(lfs, 'DS_fuzzy_', 'P')
ds_i = lf_levels(lfs, 'DS_fuzzy_', 'I')
ds_o = lf_levels(lfs, 'DS_fuzzy_', 'O')

# Level 4: dictionary, rules, heuristics
heur_p = lf_levels(lfs, 'heuristics_direct_', 'P')
heur_i = lf_levels(lfs, 'heuristics_direct_', 'I')
heur_o = lf_levels(lfs, 'heuristics_direct_', 'O')

dict_p = lf_levels(lfs, 'dictionary_direct_', 'P')
dict_i = lf_levels(lfs, 'dictionary_direct_', 'I')
dict_o = lf_levels(lfs, 'dictionary_direct_', 'O')

In [20]:
def compare(s, t):
    return sorted(s) == sorted(t)

def getLFs(partition:list, umls_d:dict, seed_len:int):

    all_lfs_combined = []
    
    for lf in partition: # for each lf in a partition
        
        combine_here = [0] * seed_len

        for sab in lf:
            new_a = umls_d[sab]
            old_a = combine_here
            temp_a = []
            for o_a, n_a in zip(old_a, new_a):
                if compare([o_a, n_a] ,[-1, 1]) == True:
                    replace_a = max( o_a, n_a )
                    temp_a.append( replace_a )
                elif compare([o_a, n_a] ,[0, 1]) == True:
                    replace_a = max( o_a, n_a )
                    temp_a.append( replace_a )
                elif compare([o_a, n_a] ,[-1, 0]) == True:
                    replace_a = min( o_a, n_a )
                    temp_a.append( replace_a )
                else:
                    temp_a.append( o_a )

            combine_here = temp_a

        all_lfs_combined.append( combine_here )

    return all_lfs_combined

In [48]:
def grid_search(model_class,
                model_class_init,
                param_grid,
                train=None,
                dev=None,
                other_train=None,
                n_model_search=5,
                val_metric='f1_macro',
                seed=1234,
                checkpoint_gt_mv=False,
                tag_fmt_ckpnt='IO'):
    
    
    """Simple grid search helper function

    Parameters
    ----------
    model_class
    model_class_init
    param_grid
    train
    dev
    n_model_search
    val_metric
    seed

    Returns
    -------
    

    """
    
    
    L_train, Y_train = train
    L_dev, Y_dev = dev
    
    # sample configs
    params = sample_param_grid(param_grid, seed)[:n_model_search]
    
    defaults = {'seed': seed}
    best_score, best_config = 0.0, None
    print(f"Grid search over {len(params)} configs")
    
    for i, config in enumerate(params):
        print(f'[{i}] Label Model')
        config = dict(zip(param_grid.keys(), config))
        # update default params if not specified
        config.update({param: value for param, value in defaults.items() if param not in config})

        model = model_class(**model_class_init)
        model.fit(L_train, Y_dev, **config)
        
        y_pred = model.predict(L_dev)
        
        # set gold tags for evaluation
        if tag_fmt_ckpnt == 'IO':
            y_gold = np.array([0 if y == 0 else 1 for y in Y_dev])
        else:
            y_gold = Y_dev
            
            
        if -1 in y_pred:
            print("Label model predicted -1 (TODO: this happens inconsistently)")
            continue
            
        # use internal label model scorer to score the prediction
        metrics = model.score(L=L_dev,
                              Y=y_gold,
                              metrics=['accuracy', 'precision', 'recall', 'f1', 'f1_macro'],
                              tie_break_policy='random')
        
    
        # compare learned model against MV on same labeled dev set
        # skip if LM less than MV
        if checkpoint_gt_mv:
            mv_metrics = model.score(L=L_dev,
                                  Y=y_gold,
                                  metrics=['accuracy', 'precision', 'recall', 'f1', 'f1_macro'],
                                  tie_break_policy='random')

            if metrics[val_metric] < mv_metrics[val_metric]:
                continue
                
        if not best_score or metrics[val_metric] > best_score[val_metric]:
            print(config)
            best_score = metrics
            best_config = config
            
            # print training set score if we have labeled data
            if np.any(Y_train):
                y_pred = model.predict(L_train)

                if tag_fmt_ckpnt == 'IO':
                    y_gold = np.array([0 if y == 0 else 1 for y in Y_train])
                else:
                    y_gold = Y_train

                metrics = model.score(L=L_train,
                                      Y=y_gold,
                                      metrics=['accuracy', 'precision', 'recall', 'f1', 'f1_macro'],
                                      tie_break_policy='random')

                print('[TRAIN] {}'.format(' | '.join([f'{m}: {v * 100:2.2f}' for m, v in metrics.items()])))

            print('[DEV]   {}'.format(' | '.join([f'{m}: {v * 100:2.2f}' for m, v in best_score.items()])))
            print('-' * 88)
            
            
    # retrain best model
    print('BEST')
    print(best_config)
    model = model_class(**model_class_init)
    
    
    model.fit(L_train, Y_dev, **best_config)
    return model, best_config, best_score

In [52]:
def train(partitioned_d_umls, umls_d, non_umls_d, ds_d, heur_d, dict_d, df_data_train, df_data_val, picos, paramgrid):
   

    best_f1_macro = 0.0
    best_overall_model = ''
    best_overall_config = ''
    
    
    model_class_init = {
        'cardinality': 2, 
        'verbose': True
    }

    num_hyperparams = functools.reduce(lambda x,y:x*y, [len(x) for x in param_grid.values()])
    print("Hyperparamater Search Space:", num_hyperparams)
    n_model_search = 25
    


    '''#########################################################################
    # Choosing the number of LF's from UMLS all
    #########################################################################'''
    
    for i, partition in enumerate(partitioned_d_umls):
        
        if len( partition ) >= 0:

            combined_lf = getLFs(partition, umls_d, len(Y_tokens))
            assert len(partition) == len(combined_lf)

            print( 'Total number of UMLS partitions: ', len(partition) )
            combined_lf.extend( list(non_umls_d.values()) ) # Combine with level 2
            combined_lf.extend( list(ds_d.values()) ) # Combine with level 3
            combined_lf.extend( list(heur_d.values()) ) # Combine with level 4
            combined_lf.extend( list(dict_d.values()) ) # combine with level 4

            L = np.array(combined_lf)
            L = np.transpose(L)
            L_train, L_val = train_test_split(L, test_size=0.20, shuffle=False)

            Y_train = df_data_train[picos]
            Y_val = df_data_val[picos]


            best_model, best_config, best_score = grid_search(LMsnorkel, 
                                               model_class_init, 
                                               paramgrid,
                                               train = (L_train, Y_train),
                                               dev = (L_val, Y_val),
                                               n_model_search=n_model_search, 
                                               val_metric='f1_macro', 
                                               seed=1234,
                                               tag_fmt_ckpnt='IO')

            if best_score['f1_macro'] > best_f1_macro:
                best_f1_macro = best_score['f1_macro']
                best_overall_model = best_model
                best_overall_config = best_config


            print('Best overall macro F1 score: ', best_f1_macro)
            print('Best overall configuration: ', best_overall_config)

In [50]:
param_grid = {
    'lr': [0.001, 0.0001],
    'l2': [0.001, 0.0001],
    'n_epochs': [50, 100, 200, 600, 700, 1000, 2000],
    'prec_init': [0.6, 0.7, 0.8, 0.9],
    'optimizer': ["adamax", "adam", "sgd"],
    'lr_scheduler': ['constant'],
}

In [None]:
train(partitioned_p_umls, umls_p, nonumls_p, ds_p, heur_p, dict_p, df_data_train, df_data_val, 'p', paramgrid = param_grid)

Hyperparamater Search Space: 336
Total number of UMLS partitions:  1
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.89 | precision: 0.00 | recall: 0.00 | f1: 0.00 | f1_macro: 46.78
[DEV]   accuracy: 87.70 | precision: 0.00 | recall: 0.00 | f1: 0.00 | f1_macro: 46.72
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.05 | precision: 32.19 | recall: 6.29 | f1: 10.52 | f1_macro: 51.77
[DEV]   accuracy: 86.93 | precision: 33.68 | recall: 6.46 | f1: 10.84 | f1_macro: 51.89
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600,

[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.22 | precision: 34.59 | recall: 6.17 | f1: 10.48 | f1_macro: 51.80
[DEV]   accuracy: 87.10 | precision: 36.13 | recall: 6.41 | f1: 10.89 | f1_macro: 51.97
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 85.17 | precision: 24.36 | recall: 10.68 | f1: 14.85 | f1_macro: 53.36
[DEV]   accuracy: 85.10 | precision: 25.42 | recall: 10.93 | f1: 15.29 | f1_macro: 53.56
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[1

In [53]:
train(partitioned_i_umls, umls_i, nonumls_i, ds_i, heur_i, dict_i, df_data_train, df_data_val, 'i', paramgrid = param_grid)

Hyperparamater Search Space: 336
Total number of UMLS partitions:  1
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.70 | precision: 48.79 | recall: 3.74 | f1: 6.94 | f1_macro: 51.02
[DEV]   accuracy: 90.62 | precision: 47.69 | recall: 3.88 | f1: 7.17 | f1_macro: 51.12
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.31 | precision: 27.03 | recall: 27.94 | f1: 27.48 | f1_macro: 59.96
[DEV]   accuracy: 86.20 | precision: 27.05 | recall: 28.08 | f1: 27.56 | f1_macro: 59.97
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 

[TRAIN] accuracy: 90.42 | precision: 28.81 | recall: 2.13 | f1: 3.96 | f1_macro: 49.46
[DEV]   accuracy: 90.41 | precision: 31.04 | recall: 2.15 | f1: 4.03 | f1_macro: 49.49
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.32 | precision: 36.25 | recall: 5.68 | f1: 9.81 | f1_macro: 52.35
[DEV]   accuracy: 90.28 | precision: 37.52 | recall: 6.00 | f1: 10.34 | f1_macro: 52.60
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.07 | precision: 36.87 | recall: 9.75 | f1: 15.42 | f1_macro: 55.07
[DEV]   accuracy: 90.06 | precision: 37.87 | recall: 10.05 | f1: 15.

[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 89.88 | precision: 35.36 | recall: 10.90 | f1: 16.67 | f1_macro: 55.64
[DEV]   accuracy: 89.86 | precision: 36.20 | recall: 11.20 | f1: 17.10 | f1_macro: 55.85
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.6000773130770576
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 

[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.6000773130770576
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  13
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.59 | precision: 31.34 | recall: 1.18 | f1: 2.27 | f1_macro: 48.66
[DEV]   accuracy: 90.53 | precision: 30.66 | recall: 1.09 | f1: 2.10 | f1_macro: 48.56
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, '

Total number of UMLS partitions:  17
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.57 | precision: 31.20 | recall: 1.31 | f1: 2.52 | f1_macro: 48.78
[DEV]   accuracy: 90.50 | precision: 29.01 | recall: 1.19 | f1: 2.29 | f1_macro: 48.65
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 89.36 | precision: 24.14 | recall: 6.80 | f1: 10.61 | f1_macro: 52.48
[DEV]   accuracy: 89.33 | precision: 24.45 | recall: 6.81 | f1: 10.65 | f1_macro: 52.49
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer'

[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 89.14 | precision: 22.53 | recall: 6.95 | f1: 10.62 | f1_macro: 52.42
[DEV]   accuracy: 89.10 | precision: 22.65 | recall: 6.92 | f1: 10.60 | f1_macro: 52.40
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.19 | precision: 25.60 | recall: 14.24 | f1: 18.30 | f1_macro: 55.97
[DEV]   accuracy: 88.13 | precision: 25.83 | recall: 14.46 | f1: 18.54 | f1_macro: 56.07
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[1

{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.77 | precision: 24.95 | recall: 15.80 | f1: 19.35 | f1_macro: 56.36
[DEV]   accuracy: 87.74 | precision: 25.34 | recall: 16.04 | f1: 19.64 | f1_macro: 56.50
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.6000773130770576
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'op

[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.6000773130770576
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  30
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.67 | precision: 34.61 | recall: 0.50 | f1: 0.99 | f1_macro: 48.05
[DEV]   accuracy: 90.62 | precision: 35.15 | recall: 0

[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.6000773130770576
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  34
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.68 | precision: 20.00 | recall: 0.13 | f1: 0.27 | f1_macro: 47.69
[DEV]   accuracy: 90.63 | precision: 17.60 | recall: 0.09 | f1: 0.17 | f1_macro: 47.63
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adam

Total number of UMLS partitions:  38
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 90.64 | precision: 32.81 | recall: 0.76 | f1: 1.49 | f1_macro: 48.29
[DEV]   accuracy: 90.60 | precision: 34.31 | recall: 0.70 | f1: 1.36 | f1_macro: 48.21
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.50 | precision: 27.03 | recall: 14.06 | f1: 18.50 | f1_macro: 56.15
[DEV]   accuracy: 88.42 | precision: 27.07 | recall: 14.12 | f1: 18.56 | f1_macro: 56.16
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimize

In [54]:
train(partitioned_o_umls, umls_o, nonumls_o, ds_o, heur_o, dict_o, df_data_train, df_data_val, 'o', paramgrid = param_grid)

Hyperparamater Search Space: 336
Total number of UMLS partitions:  1
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.29 | precision: 26.29 | recall: 5.98 | f1: 9.75 | f1_macro: 51.46
[DEV]   accuracy: 87.11 | precision: 25.20 | recall: 5.68 | f1: 9.27 | f1_macro: 51.16
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 85.50 | precision: 26.07 | recall: 14.40 | f1: 18.55 | f1_macro: 55.30
[DEV]   accuracy: 85.32 | precision: 25.89 | recall: 14.30 | f1: 18.42 | f1_macro: 55.18
----------------------------------------------------------------------------------------
[4] Label Model
[5] Label Model
[6] Label Model
[7] Lab

[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 74.68 | precision: 20.19 | recall: 40.88 | f1: 27.03 | f1_macro: 55.86
[DEV]   accuracy: 74.62 | precision: 20.30 | recall: 40.66 | f1: 27.08 | f1_macro: 55.86
----------------------------------------------------------------------------------------
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.558678196111414
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  5

[TRAIN] accuracy: 88.28 | precision: 34.10 | recall: 2.35 | f1: 4.41 | f1_macro: 49.08
[DEV]   accuracy: 88.12 | precision: 32.53 | recall: 2.31 | f1: 4.32 | f1_macro: 48.99
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.63 | precision: 27.41 | recall: 10.06 | f1: 14.71 | f1_macro: 53.73
[DEV]   accuracy: 86.45 | precision: 26.85 | recall: 9.77 | f1: 14.33 | f1_macro: 53.49
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.87 | precision: 25.65 | recall: 16.81 | f1: 20.31 | f1_macro: 55.98
[DEV]   accuracy: 84.64 | precision: 25.21 | recall: 16.54 | f1: 

[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.79 | precision: 26.00 | recall: 17.67 | f1: 21.04 | f1_macro: 56.31
[DEV]   accuracy: 84.56 | precision: 25.56 | recall: 17.36 | f1: 20.68 | f1_macro: 56.06
----------------------------------------------------------------------------------------
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.5606212140618395
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'ad

{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.51 | precision: 30.38 | recall: 0.18 | f1: 0.36 | f1_macro: 47.13
[DEV]   accuracy: 88.38 | precision: 24.86 | recall: 0.14 | f1: 0.28 | f1_macro: 47.05
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.67 | precision: 32.82 | recall: 7.17 | f1: 11.76 | f1_macro: 52.57
[DEV]   accuracy: 87.47 | precision: 31.71 | recall: 7.04 | f1: 11.52 | f1_macro: 52.39
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.60 | pre

[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.21 | precision: 25.03 | recall: 18.88 | f1: 21.53 | f1_macro: 56.38
[DEV]   accuracy: 84.02 | precision: 24.86 | recall: 18.72 | f1: 21.36 | f1_macro: 56.23
----------------------------------------------------------------------------------------
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.5623296163661227
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'ad

Total number of UMLS partitions:  22
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.53 | precision: 49.70 | recall: 0.07 | f1: 0.13 | f1_macro: 47.03
[DEV]   accuracy: 88.40 | precision: 35.29 | recall: 0.04 | f1: 0.08 | f1_macro: 46.96
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.27 | precision: 31.21 | recall: 9.14 | f1: 14.14 | f1_macro: 53.63
[DEV]   accuracy: 87.08 | precision: 30.60 | recall: 9.05 | f1: 13.97 | f1_macro: 53.49
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer'

[TRAIN] accuracy: 85.61 | precision: 27.04 | recall: 14.98 | f1: 19.28 | f1_macro: 55.69
[DEV]   accuracy: 85.41 | precision: 26.49 | recall: 14.59 | f1: 18.81 | f1_macro: 55.40
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 83.81 | precision: 24.89 | recall: 20.41 | f1: 22.43 | f1_macro: 56.69
[DEV]   accuracy: 83.60 | precision: 24.63 | recall: 20.15 | f1: 22.17 | f1_macro: 56.50
----------------------------------------------------------------------------------------
BEST


Best overall macro F1 score:  0.5719613078917631
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  29
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.50 | precision: 46.54 | recall: 1.96 | f1: 3.75 | f1_macro: 48.82
[DEV]   accuracy: 88.38 | precision: 47.22 | recall: 2.16 | f1: 4.13 | f1_macro: 48.97
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.94 | precision: 30.77 | recall: 11.13 | f1: 16.35 | f1_macro: 54.63
[DEV]   accuracy: 86.73 | precision: 30.01 | recall: 10.86 | 

{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.90 | precision: 27.94 | recall: 20.03 | f1: 23.33 | f1_macro: 57.48
[DEV]   accuracy: 84.73 | precision: 27.54 | recall: 19.44 | f1: 22.79 | f1_macro: 57.16
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 83.12 | precision: 25.39 | recall: 24.36 | f1: 24.86 | f1_macro: 57.68
[DEV]   accuracy: 82.93 | precision: 25.06 | recall: 23.75

Best overall macro F1 score:  0.5742655274999584
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  36
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.56 | precision: 65.07 | recall: 0.62 | f1: 1.23 | f1_macro: 47.58
[DEV]   accuracy: 88.45 | precision: 73.55 | recall: 0.57 | f1: 1.12 | f1_macro: 47.50
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.25 | precision: 32.07 | recall: 9.98 | f1: 15.23 | f1_macro: 54.17
[DEV]   accuracy: 87.03 | precision: 30.76 | recall: 9.48 | f1

{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.42 | precision: 25.97 | recall: 19.35 | f1: 22.17 | f1_macro: 56.76
[DEV]   accuracy: 84.28 | precision: 25.91 | recall: 19.17 | f1: 22.04 | f1_macro: 56.65
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 82.28 | precision: 24.31 | recall: 25.79 | f1: 25.02 | f1_macro: 57.49
[DEV]   accuracy: 82.11 | precision: 24.15 | recall: 25.37

Best overall macro F1 score:  0.5744191899815492
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  43
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.52 | precision: 49.46 | recall: 2.86 | f1: 5.41 | f1_macro: 49.65
[DEV]   accuracy: 88.41 | precision: 49.79 | recall: 3.00 | f1: 5.66 | f1_macro: 49.74
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.64 | precision: 30.53 | recall: 12.93 | f1: 18.17 | f1_macro: 55.45
[DEV]   accuracy: 86.48 | precision: 30.26 | recall: 12.77 | 

{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 83.51 | precision: 25.55 | recall: 22.87 | f1: 24.14 | f1_macro: 57.44
[DEV]   accuracy: 83.36 | precision: 25.37 | recall: 22.44 | f1: 23.81 | f1_macro: 57.24
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 81.83 | precision: 24.18 | recall: 27.35 | f1: 25.67 | f1_macro: 57.66
[DEV]   accuracy: 81.68 | precision: 24.06 | recall: 26.94

Best overall macro F1 score:  0.5749266156351646
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  50
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.58 | precision: 55.43 | recall: 2.24 | f1: 4.31 | f1_macro: 49.12
[DEV]   accuracy: 88.48 | precision: 57.56 | recall: 2.44 | f1: 4.67 | f1_macro: 49.27
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.94 | precision: 31.39 | recall: 11.72 | f1: 17.07 | f1_macro: 54.99
[DEV]   accuracy: 86.77 | precision: 30.99 | recall: 11.53 | 

Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.35 | precision: 41.76 | recall: 3.93 | f1: 7.18 | f1_macro: 50.48
[DEV]   accuracy: 88.21 | precision: 40.82 | recall: 3.78 | f1: 6.92 | f1_macro: 50.31
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.61 | precision: 31.30 | recall: 14.05 | f1: 19.39 | f1_macro: 56.04
[DEV]   accuracy: 86.44 | precision: 30.87 | recall: 13.69 | f1: 18.96 | f1_macro: 55.78
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant

[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 80.73 | precision: 23.75 | recall: 30.79 | f1: 26.81 | f1_macro: 57.86
[DEV]   accuracy: 80.64 | precision: 23.75 | recall: 30.34 | f1: 26.64 | f1_macro: 57.74
----------------------------------------------------------------------------------------
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.577437012784455
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'ada

Total number of UMLS partitions:  61
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.38 | precision: 42.11 | recall: 3.61 | f1: 6.65 | f1_macro: 50.23
[DEV]   accuracy: 88.24 | precision: 41.44 | recall: 3.53 | f1: 6.51 | f1_macro: 50.12
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.93 | precision: 33.14 | recall: 13.73 | f1: 19.42 | f1_macro: 56.15
[DEV]   accuracy: 86.75 | precision: 32.67 | recall: 13.49 | f1: 19.09 | f1_macro: 55.94
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimize

[TRAIN] accuracy: 81.79 | precision: 24.23 | recall: 27.63 | f1: 25.82 | f1_macro: 57.72
[DEV]   accuracy: 81.66 | precision: 24.24 | recall: 27.38 | f1: 25.71 | f1_macro: 57.63
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 80.00 | precision: 23.57 | recall: 33.17 | f1: 27.56 | f1_macro: 57.98
[DEV]   accuracy: 79.95 | precision: 23.70 | recall: 32.89 | f1: 27.55 | f1_macro: 57.96
----------------------------------------------------------------------------------------
BEST


[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.5796060660131088
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 2000, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  68
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.20 | precision: 38.57 | recall: 4.82 | f1: 8.57 | f1_macro: 51.13
[DEV]   accuracy: 88.06 | precision: 37.82 | recall: 4.65 | f1: 8.28 | f1_macro: 50.95
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700

[TRAIN] accuracy: 86.71 | precision: 33.15 | recall: 15.67 | f1: 21.28 | f1_macro: 57.01
[DEV]   accuracy: 86.56 | precision: 32.94 | recall: 15.39 | f1: 20.98 | f1_macro: 56.82
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 80.84 | precision: 23.67 | recall: 30.14 | f1: 26.51 | f1_macro: 57.75
[DEV]   accuracy: 80.75 | precision: 23.70 | recall: 29.78 | f1: 26.40 | f1_macro: 57.66
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'consta

Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 88.16 | precision: 38.25 | recall: 5.26 | f1: 9.25 | f1_macro: 51.46
[DEV]   accuracy: 88.00 | precision: 37.27 | recall: 5.14 | f1: 9.03 | f1_macro: 51.30
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.46 | precision: 32.71 | recall: 17.10 | f1: 22.46 | f1_macro: 57.52
[DEV]   accuracy: 86.30 | precision: 32.41 | recall: 16.75 | f1: 22.08 | f1_macro: 57.29
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant

[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 82.54 | precision: 25.41 | recall: 26.97 | f1: 26.17 | f1_macro: 58.13
[DEV]   accuracy: 82.43 | precision: 25.36 | recall: 26.56 | f1: 25.94 | f1_macro: 57.99
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.5855618601718491
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  79
Grid search over 25 confi

In [45]:
train(partitioned_p_umls, umls_p, nonumls_p, ds_p, heur_p, dict_p, df_data_train, df_data_val, 'p', paramgrid = param_grid)

Hyperparamater Search Space: 336
Total number of UMLS partitions:  40
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.58 | precision: 40.81 | recall: 5.78 | f1: 10.12 | f1_macro: 51.73
[DEV]   accuracy: 87.43 | precision: 42.05 | recall: 5.81 | f1: 10.20 | f1_macro: 51.72
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 85.84 | precision: 28.60 | recall: 11.29 | f1: 16.19 | f1_macro: 54.23
[DEV]   accuracy: 85.73 | precision: 29.34 | recall: 11.37 | f1: 16.39 | f1_macro: 54.29
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs

Total number of UMLS partitions:  44
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.57 | precision: 41.16 | recall: 6.23 | f1: 10.82 | f1_macro: 52.07
[DEV]   accuracy: 87.42 | precision: 42.21 | recall: 6.19 | f1: 10.79 | f1_macro: 52.01
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 85.87 | precision: 28.26 | recall: 10.86 | f1: 15.68 | f1_macro: 53.99
[DEV]   accuracy: 85.76 | precision: 28.95 | recall: 10.88 | f1: 15.81 | f1_macro: 54.02
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimi

[TRAIN] accuracy: 82.80 | precision: 24.76 | recall: 20.63 | f1: 22.50 | f1_macro: 56.41
[DEV]   accuracy: 82.80 | precision: 25.59 | recall: 20.87 | f1: 22.99 | f1_macro: 56.65
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 83.01 | precision: 25.07 | recall: 20.27 | f1: 22.41 | f1_macro: 56.44
[DEV]   accuracy: 82.98 | precision: 25.85 | recall: 20.54 | f1: 22.89 | f1_macro: 56.66
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BES

Best overall macro F1 score:  0.5676474343359563
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  51
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.59 | precision: 41.41 | recall: 5.90 | f1: 10.33 | f1_macro: 51.83
[DEV]   accuracy: 87.45 | precision: 42.70 | recall: 5.87 | f1: 10.33 | f1_macro: 51.79
----------------------------------------------------------------------------------------
[1] Label Model
[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 85.98 | precision: 29.69 | recall: 11.50 | f1: 16.58 | f1_macro: 54.47
[DEV]   accuracy: 85.88 | precision: 30.41 | recall: 11.5

[TRAIN] accuracy: 82.82 | precision: 25.46 | recall: 21.73 | f1: 23.45 | f1_macro: 56.89
[DEV]   accuracy: 82.77 | precision: 26.12 | recall: 21.94 | f1: 23.85 | f1_macro: 57.07
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.5723380566697189
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  55
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_init': 0.8, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 87.60 | precision: 41.40 | recall: 5.84 | f1: 10.24 |

[2] Label Model
[3] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 700, 'prec_init': 0.9, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 86.01 | precision: 31.34 | recall: 13.04 | f1: 18.42 | f1_macro: 55.38
[DEV]   accuracy: 85.85 | precision: 31.71 | recall: 13.03 | f1: 18.47 | f1_macro: 55.36
----------------------------------------------------------------------------------------
[4] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 600, 'prec_init': 0.6, 'optimizer': 'adam', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 82.33 | precision: 24.99 | recall: 22.96 | f1: 23.93 | f1_macro: 56.97
[DEV]   accuracy: 82.33 | precision: 25.72 | recall: 23.11 | f1: 24.35 | f1_macro: 57.17
----------------------------------------------------------------------------------------
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model


[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.04 | precision: 28.13 | recall: 20.47 | f1: 23.70 | f1_macro: 57.39
[DEV]   accuracy: 84.00 | precision: 28.93 | recall: 20.68 | f1: 24.12 | f1_macro: 57.59
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.57612149546103
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Total number of UMLS partitions:  62
Grid search over 25 configs
[0] Label Model
{'lr': 0.001, 'l2': 0.0001, 'n_epochs': 200, 'prec_

[TRAIN] accuracy: 85.69 | precision: 32.96 | recall: 17.55 | f1: 22.90 | f1_macro: 57.51
[DEV]   accuracy: 85.55 | precision: 33.32 | recall: 17.49 | f1: 22.94 | f1_macro: 57.48
----------------------------------------------------------------------------------------
[4] Label Model
[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 84.33 | precision: 28.98 | recall: 20.25 | f1: 23.84 | f1_macro: 57.56
[DEV]   accuracy: 84.27 | precision: 29.73 | recall: 20.45 | f1: 24.23 | f1_macro: 57.73
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24]

[5] Label Model
[6] Label Model
[7] Label Model
[8] Label Model
[9] Label Model
[10] Label Model
[11] Label Model
[12] Label Model
[13] Label Model
[14] Label Model
[15] Label Model
[16] Label Model
[17] Label Model
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
[TRAIN] accuracy: 83.83 | precision: 27.90 | recall: 21.19 | f1: 24.09 | f1_macro: 57.52
[DEV]   accuracy: 83.78 | precision: 28.68 | recall: 21.43 | f1: 24.53 | f1_macro: 57.72
----------------------------------------------------------------------------------------
[18] Label Model
[19] Label Model
[20] Label Model
[21] Label Model
[22] Label Model
[23] Label Model
[24] Label Model
BEST
{'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0.8, 'optimizer': 'adamax', 'lr_scheduler': 'constant', 'seed': 1234}
Best overall macro F1 score:  0.5783322738359847
Best overall configuration:  {'lr': 0.001, 'l2': 0.001, 'n_epochs': 1000, 'prec_init': 0