 # Compares multiple experiments outputs
 
 **Note:** Drag this notebook to root directory and activate notebooks there. Otherwise paths will break :)
 
 ## 1. Imports

In [7]:
import os 
#helps speedup file io
import pandas as pd
from collections import defaultdict

# custom models that allow evaluation
from models.evaluator_conll import EvaluatorConll # Runs conll official script under the hood
# from models.propbank import Propbank # Propbank class that acts as a wrapper to binaries
from models.propbank_encoder import PropbankEncoder
from models.propbank_mappers import MapperT2ARG


## 2. Initializing Propbank and Evaluator

In [8]:
propbank = PropbankEncoder.recover('datasets/binaries/deep_glo50.pickle')
mapper_t2arg = MapperT2ARG(propbank)

train_evaluator = EvaluatorConll('train', 
        propbank.column('train', 'S', 'CAT'),
        propbank.column('train', 'P', 'CAT'),
        propbank.column('train', 'PRED', 'CAT'),
        propbank.column('train', 'ARG', 'CAT')
)
valid_evaluator = EvaluatorConll('valid', 
        propbank.column('valid', 'S', 'CAT'),
        propbank.column('valid', 'P', 'CAT'),
        propbank.column('valid', 'PRED', 'CAT'),
        propbank.column('valid', 'ARG', 'CAT')
)

## 3. Iterates outputs dir

In [9]:
def deep_leader_board(thisdir, f1_valid, f1_train=-1):
    '''
        Extracts model's experiment encoded within thisdir

        args:
            thisdir .:
            f1_valid .:
            f1_train .:

        returns:
            d .: dict<str, dict<str,str>> is a nested dict
                keys: are a experiment id
                    keys: fields
                    values: experimental values
    '''
    d = defaultdict(dict)
    # first element in split list is the outputs directory
    params = thisdir.split('/')[1:]
    outer_key = '_'.join(params)

    # params[0] encodes
    # model_prediction_version_<subversion>
    # params[2] encodes subversion or subsubversion
    parsestr = '{:}_{:}'.format(params[0],params[-1])
    model, predictor, *version =  parsestr.split('_')
    #version grabs everything after first two params
    if (len(version) == 1):
        version = '1.{:}'.format(version[0])
    else:
        version = '.'.join(version)
    version= version.replace('sanity.check', 'sanity-check')
    parsestr = params[1]
    lr, hs, ctx_p, *embeddings = parsestr.split('_')
    
    d[outer_key] = {
        'model': model,
        'predictor': predictor,
        'version': version,
        'lr':  lr.replace('lr',''),
        'hidden':  hs.replace('hs',''),
        'ctx-p':  ctx_p.replace('ctx-p',''),
        'embeddings': '/'.join(embeddings),
        'valid-f1': f1_valid
    }

    if (f1_train >= 0.0 and f1_valid > 0.0):
        d[outer_key]['train-f1'] = f1_train 
        d[outer_key]['ratio-f1'] = f1_train / f1_valid
    else:
        d[outer_key]['train-f1'] = None
        d[outer_key]['ratio-f1'] = None
    
    return d


In [10]:
def svm_leader_board(thisdir, f1_valid, f1_train=-1):
    '''
        Extracts model's experiment encoded within thisdir

        args:
            thisdir .:
            f1_valid .:
            f1_train .:

        returns:
            d .: dict<str, dict<str,str>> is a nested dict
                keys: are a experiment id
                    keys: fields
                    values: experimental values
    '''
    
    # encoding and optarg
#     _, _, encoding, *optargs = thisdir.split('/')
#     version = optargs[-1]
#     optargs = optargs[:-1]
    
    # FUTURE: feature engineering
    pass

In [11]:
deep_d = {} 
svm_d = {}
#thisdir encodes model and a version, hiperparams and subversion
for thisdir, thissubdir, thisfiles in os.walk('outputs'):
    # node condition: when this happens than we have 
    # model_name, model_version, model_hparams, subversion
    if not(thissubdir) and thisfiles:
        # this is an "older" format before conll score
        if 'Yhat_valid.csv' in thisfiles:
            filepath= '{:}/Yhat_valid.csv'.format(thisdir)
            d = pd.read_csv(filepath, sep=',', index_col=0).to_dict()            
            ARG_d = d['Y_0'] if  'Y_0' in d else d['Y_ARG']
            valid_evaluator.evaluate(ARG_d)
            deep_d.update(deep_leader_board(thisdir,valid_evaluator.f1))
        elif 'lstm' in thisdir and (('conllscore_valid.txt' in thisfiles) or ('conllscore_train.txt' in thisfiles)): 
            if 'conllscore_valid.txt' in thisfiles:
                filepath= '{:}/conllscore_valid.txt'.format(thisdir)
                valid_evaluator.evaluate_fromconllfile(filepath)
            
            if 'conllscore_train.txt' in thisfiles:
                filepath= '{:}/conllscore_train.txt'.format(thisdir)
                train_evaluator.evaluate_fromconllfile(filepath)
                
            deep_d.update(deep_leader_board(thisdir,valid_evaluator.f1, train_evaluator.f1))
            
        elif 'svm' in thisdir and (('train.pickle' in thisfiles) or ('valid.pickle' in thisfiles)):
            if ('train.pickle' in thisfiles):                
                filepath = '{:}/train.pickle'.format(thisdir)
                train_evaluator.evaluate_fromliblinear(filepath, mapper_t2arg)
                
            if ('valid.pickle' in thisfiles):                
                filepath = '{:}/valid.pickle'.format(thisdir)
                valid_evaluator.evaluate_fromliblinear(filepath, mapper_t2arg)
            print('svm:', train_evaluator.f1, valid_evaluator.f1)
# #             svm_d.update(svm_leader_board(thisdir,valid_evaluator.f1, train_evaluator.f1))
            

svm: 2.3 0.82
svm: 2.36 0.71
svm: 2.36 0.71
svm: 1.48 1.16
svm: 2.81 0.77
svm: 1.05 0.84
svm: 2.81 0.81
svm: 2.66 1.02
svm: 2.81 0.67
svm: 2.7 0.78


In [12]:
# print(experiments_d)
df = pd.DataFrame.from_dict(deep_d, orient='index')
df.reset_index(drop=True,inplace=True)
df.sort_values('valid-f1', ascending=False, inplace=True)
df.head(50)

Unnamed: 0,model,predictor,version,lr,hidden,ctx-p,embeddings,valid-f1,train-f1,ratio-f1
16,dblstm,crf,3.sanity-check.2.00,0.0005,32x32x32x32,1,glove/s50,96.68,99.45,1.028651
21,dblstm,crf,4.01,0.0005,16x16x16x16,1,glove/s50,44.67,64.37,1.441012
22,dblstm,crf,4.01,0.0005,16x16x16x16,1,wang2vec/s50,43.39,96.65,2.227472
26,dblstm,crf,4.00,0.0005,32x16,1,glove/s50,42.82,67.2,1.56936
0,blstm,crf,2.00,0.001,128x64,1,glove/s50,41.61,99.02,2.379716
2,blstm,crf,2.00,0.0005,128x64,1,glove/s50,41.61,99.02,2.379716
28,dblstm,crf,4.00,0.0005,32x32x32x32,3,wang2vec/s50,40.04,99.45,2.483766
17,dblstm,crf,3.sanity-check.00,0.0005,16x16x16x16,3,glove/s50,39.92,67.77,1.697645
12,dblstm,crf,3.02,0.0005,32x32,1,glove/s50,38.38,65.9,1.71704
14,dblstm,crf,3.00,0.0005,32x32x32x32,1,glove/s50,38.06,92.15,2.421177
