In [124]:
import sys
import os
import json
sys.path.append('..')
# sys.path.append('datasets')
# sys.path.append('models')
import pandas as pd

from models.propbank_encoder import PropbankEncoder
from models.conll_evaluator import ConllEvaluator

# this is the default configuration
default = {"batch_size": 250,
          "chunks": False,
          "ctxp": 1,
          "embeddings_model": "glo50",
          "embeddings_trainable": False,           
          "epochs": 1000,
          "hidden_layers": [32],
          "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"],
          "lr": 0.005,
          "ru": "BasicLSTM",
          "target_labels": ["T"],
          "kfold": False,
          "version": "1.0"}

# initialize PropbankEncoder
pe = PropbankEncoder.recover('../datasets/binaries/1.0/deep_glo50.pickle')

# initialize ConllEvaluator
ev = ConllEvaluator(pe, target_dir=os.getcwd())



In [125]:
def deep_leaderboard(path, file_list):
    filter_tuple = ('outputs', '1.0', '..')
    params_list = [hp for hp in path.split('/') if hp not in filter_tuple]
  
    params_dict = default.copy()
    if 'params.json' in file_list:
        # First attempt to find the params files
        file_path = path + '/params.json'
        with open(file_path, mode='r') as f:
            params_dict.update(json.load(f))
    else:
        def get_hidden(hidden_string):
            hidden_list = hidden_string.split('_')[-1].split('x')
            return [int(x) for x in hidden_list]
        
        # Get data from parameters string
        params_dict['embeddings_model'] = params_list[0]
        params_dict['hidden_layers'] = get_hidden(params_list[1])
        params_dict['ctxp'] = int(params_list[2].split('_')[-1])
        params_dict['target_labels'] = params_list[3].split('_')
        params_dict['kfold'] = params_list[4] == 'kfold'
        params_dict['lr'] = float(params_list[5].split('_')[-1])
    
    params_dict['timestamp'] = params_list[6]
    # Get train F1
    if 'train.conll' in file_list:
        ev.evaluate_fromconllfile(path + '/train.conll')
        train_f1 = ev.f1
    else:
        train_f1 = 0.0    
    params_dict['train-f1'] = train_f1

    # Get valid F1
    if 'best-valid.conll' in file_list:
        ev.evaluate_fromconllfile(path + '/best-valid.conll')
        valid_f1 = ev.f1
    elif 'valid.conll' in file_list:
        ev.evaluate_fromconllfile(path + '/valid.conll')
        valid_f1 = ev.f1
    else:
        valid_f1 = 0.0
    
    params_dict['valid-f1'] = valid_f1
    print(train_f1, valid_f1)

    return params_dict
        
            

In [126]:
# Look into outputs
dir_path = '../outputs/1.0/'
zip_list = [(path_, file_list) for path_, subpath_, file_list in os.walk(dir_path) if not subpath_]
exp_list = [deep_leaderboard(*args) for args in zip_list]

df = pd.DataFrame(exp_list)




62.42 46.07
71.85 47.72
0.0 0.0
0.0 0.0
0.0 0.0
65.7 49.52
79.95 46.64
59.76 45.79
95.65 46.4
64.0 47.56
66.38 46.85
98.26 47.79
98.68 64.98
98.3 46.8
0.0 28.34
0.0 26.02
0.0 58.59
0.0 53.83
0.0 33.48
0.0 48.77
87.66 47.95
92.33 50.77
79.92 50.47
90.83 47.96
99.76 44.22
88.09 47.59
99.76 47.5
99.72 47.4
74.37 49.08
82.25 51.29
98.84 45.23
72.54 46.44
62.48 40.96
98.36 49.11
43.44 39.35
42.34 41.41
77.01 48.38
78.52 42.22
63.13 45.52


In [127]:
df.reset_index(drop=True,inplace=True)
df.sort_values('valid-f1', ascending=False, inplace=True)
df.head()

Unnamed: 0,batch_size,chunks,ctxp,embeddings_model,embeddings_trainable,epochs,hidden_layers,input_labels,kfold,lr,ru,target_labels,timestamp,train-f1,valid-f1,version
12,250,False,1,glo50,False,1000,[32],"[ID, FORM, MARKER, GPOS, FORM_CTX_P-1, FORM_CT...",False,0.005,BasicLSTM,[T],2018-09-22 130752,98.68,64.98,1.0
16,250,False,1,glo50,False,1000,[32],"[ID, FORM, MARKER, GPOS, FORM_CTX_P-1, FORM_CT...",25,0.005,BasicLSTM,[T],2018-09-04 160737,0.0,58.59,1.0
17,250,False,1,glo50,False,1000,[32],"[ID, FORM, MARKER, GPOS, FORM_CTX_P-1, FORM_CT...",25,0.005,BasicLSTM,[T],2018-09-05 150321,0.0,53.83,1.0
29,250,False,1,wan100,False,1000,"[16, 16]","[ID, FORM, MARKER, GPOS, FORM_CTX_P-1, FORM_CT...",False,0.005,BasicLSTM,[IOB],2018-08-27 150000,82.25,51.29,1.0
21,250,False,1,glo50,False,1000,[32],"[ID, FORM, MARKER, GPOS, FORM_CTX_P-1, FORM_CT...",False,0.005,BasicLSTM,"[R, IOB]",2018-09-25 130945,92.33,50.77,1.0


In [128]:
df.to_csv('leaderboard.csv', sep=';', encoding='utf-8')