 # Compares multiple experiments outputs
 
 **Note:** Drag this notebook to root directory and activate notebooks there. Otherwise paths will break :)
 
 ## 1. Imports

In [8]:
import os 
#helps speedup file io
import pandas as pd
from collections import defaultdict

# custom models that allow evaluation
from models.evaluator_conll import EvaluatorConll # Runs conll official script under the hood
from models.propbank import Propbank # Propbank class that acts as a wrapper to binaries


## 2. Initializing Propbank and Evaluator

In [9]:
propbank = Propbank.recover('datasets/binaries/db_pt_LEMMA_glove_s50.pickle')

train_evaluator = EvaluatorConll('train', 
        propbank.feature('train', 'S', True),
        propbank.feature('train', 'P', True),
        propbank.feature('train', 'PRED', True),
        propbank.feature('train', 'ARG', True)
)
valid_evaluator = EvaluatorConll('valid', 
        propbank.feature('valid', 'S', True),
        propbank.feature('valid', 'P', True),
        propbank.feature('valid', 'PRED', True),
        propbank.feature('valid', 'ARG', True)
)

## 3. Iterates outputs dir

In [33]:
def info(thisdir, f1_valid, f1_train=-1):
    '''
        Extracts model's experiment encoded within thisdir

        args:
            thisdir .:
            f1_valid .:
            f1_train .:

        returns:
            d .: dict<str, dict<str,str>> is a nested dict
                keys: are a experiment id
                    keys: fields
                    values: experimental values
    '''
    d = defaultdict(dict)
    # first element in split list is the outputs directory
    params = thisdir.split('/')[1:]
    outer_key = '_'.join(params)

    # params[0] encodes
    # model_prediction_version_<subversion>
    # params[2] encodes subversion or subsubversion
    parsestr = '{:}_{:}'.format(params[0],params[-1])
    model, predictor, *version =  parsestr.split('_')
    #version grabs everything after first two params
    if (len(version) == 1):
        version = '1.{:}'.format(version[0])
    else:
        version = '.'.join(version)
    version= version.replace('sanity.check', 'sanity-check')
    parsestr = params[1]
    lr, hs, ctx_p, *embeddings = parsestr.split('_')
    
    d[outer_key] = {
        'model': model,
        'predictor': predictor,
        'version': version,
        'lr':  lr.replace('lr',''),
        'hidden':  hs.replace('hs',''),
        'ctx-p':  ctx_p.replace('ctx-p',''),
        'embeddings': '/'.join(embeddings),
        'valid-f1': f1_valid
    }

    if (f1_train >= 0.0 and f1_valid > 0.0):
        d[outer_key]['train-f1'] = f1_train 
        d[outer_key]['ratio-f1'] = f1_train / f1_valid
    else:
        d[outer_key]['train-f1'] = None
        d[outer_key]['ratio-f1'] = None
    
    return d

In [35]:
experiments_d = {} 
#thisdir encodes model and a version, hiperparams and subversion
for thisdir, thissubdir, thisfiles in os.walk('outputs'):
    # node condition: when this happens than we have 
    # model_name, model_version, model_hparams, subversion
    if not(thissubdir) and thisfiles:
        # this is an "older" format before conll score
        if 'Yhat_valid.csv' in thisfiles:
            filepath= '{:}/Yhat_valid.csv'.format(thisdir)
            d = pd.read_csv(filepath, sep=',', index_col=0).to_dict()            
            ARG_d = d['Y_0'] if  'Y_0' in d else d['Y_ARG']
            valid_evaluator.evaluate(ARG_d)
            experiments_d.update(info(thisdir,valid_evaluator.f1))
        else:
            if 'conllscore_valid.txt' in thisfiles:
                filepath= '{:}/conllscore_valid.txt'.format(thisdir)
                valid_evaluator.evaluate_fromconllfile(filepath)
            
            if 'conllscore_train.txt' in thisfiles:
                filepath= '{:}/conllscore_train.txt'.format(thisdir)
                train_evaluator.evaluate_fromconllfile(filepath)
            experiments_d.update(info(thisdir,valid_evaluator.f1, train_evaluator.f1))        

In [44]:
df = pd.DataFrame.from_dict(experiments_d, orient='index')
df.index = pd.RangeIndex(start=1, stop=df.shape[0]+1, name= 'experiment_id')
df.head(50)

Unnamed: 0_level_0,model,predictor,version,lr,hidden,ctx-p,embeddings,valid-f1,train-f1,ratio-f1
experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,blstm,crf,2.00,0.001,128,1,wang2vec/s100,1.23,0.2,0.162602
2,blstm,crf,2.00,0.001,128x64,1,glove/s50,41.61,99.02,2.379716
3,blstm,crf,2.00,0.001,128x64x32,3,glove/s50,37.75,99.22,2.628344
4,blstm,crf,2.00,0.0005,128,1,wang2vec/s100,37.91,78.65,2.07465
5,blstm,crf,2.00,0.0005,16x16x16x16,1,glove/s50,37.91,78.65,2.07465
6,blstm,crf,2.00,0.0005,256,1,glove/s50,13.89,19.38,1.395248
7,blstm,crf,2.00,0.0005,32x32x32x32,1,glove/s50,33.85,92.73,2.739439
8,blstm,softmax,1.00,0.0005,128x128,1,glove/s50,24.62,74.17,3.012591
9,dblstm,crf,2.00,0.0005,128x64,1,glove/s50,33.23,99.32,2.988865
10,dblstm,crf,2.01,0.0005,128x64,1,glove/s50,20.39,41.09,2.015204
