 # Compares multiple experiments outputs
 
 **Note:** Drag this notebook to root directory and activate notebooks there. Otherwise paths will break :)
 
 ## 1. Imports

In [1]:
import os 
#helps speedup file io
import pandas as pd
from collections import defaultdict

# custom models that allow evaluation
from models.evaluator_conll import EvaluatorConll # Runs conll official script under the hood
from models.propbank import Propbank # Propbank class that acts as a wrapper to binaries


## 2. Initializing Propbank and Evaluator

In [2]:
propbank = Propbank.recover('datasets/binaries/db_pt_LEMMA_glove_s50.pickle')

train_evaluator = EvaluatorConll('train', 
        propbank.column('train', 'S', True),
        propbank.column('train', 'P', True),
        propbank.column('train', 'PRED', True),
        propbank.column('train', 'ARG', True)
)
valid_evaluator = EvaluatorConll('valid', 
        propbank.column('valid', 'S', True),
        propbank.column('valid', 'P', True),
        propbank.column('valid', 'PRED', True),
        propbank.column('valid', 'ARG', True)
)

test_evaluator = EvaluatorConll('test', 
        propbank.column('test', 'S', True),
        propbank.column('test', 'P', True),
        propbank.column('test', 'PRED', True),
        propbank.column('test', 'ARG', True)
)

## 3. Iterates outputs dir

In [3]:
def deep_leader_board(thisdir, f1_valid, f1_train=-1):
    '''
        Extracts model's experiment encoded within thisdir

        args:
            thisdir .:
            f1_valid .:
            f1_train .:

        returns:
            d .: dict<str, dict<str,str>> is a nested dict
                keys: are a experiment id
                    keys: fields
                    values: experimental values
    '''
    d = defaultdict(dict)
    # first element in split list is the outputs directory
    params = thisdir.split('/')[1:]
    outer_key = '_'.join(params)

    # params[0] encodes
    # model_prediction_version_<subversion>
    # params[2] encodes subversion or subsubversion
    parsestr = '{:}_{:}'.format(params[0],params[-1])
    model, predictor, *version =  parsestr.split('_')
    #version grabs everything after first two params
    if (len(version) == 1):
        version = '1.{:}'.format(version[0])
    else:
        version = '.'.join(version)
    version= version.replace('sanity.check', 'sanity-check')
    parsestr = params[1]
    lr, hs, ctx_p, *embeddings = parsestr.split('_')
    
    d[outer_key] = {
        'model': model,
        'predictor': predictor,
        'version': version,
        'lr':  lr.replace('lr',''),
        'hidden':  hs.replace('hs',''),
        'ctx-p':  ctx_p.replace('ctx-p',''),
        'embeddings': '/'.join(embeddings),
        'valid-f1': f1_valid
    }

    if (f1_train >= 0.0 and f1_valid > 0.0):
        d[outer_key]['train-f1'] = f1_train 
        d[outer_key]['ratio-f1'] = f1_train / f1_valid
    else:
        d[outer_key]['train-f1'] = None
        d[outer_key]['ratio-f1'] = None
    
    return d


In [4]:
def svm_leader_board(thisdir, f1_valid, f1_train=-1):
    '''
        Extracts model's experiment encoded within thisdir

        args:
            thisdir .:
            f1_valid .:
            f1_train .:

        returns:
            d .: dict<str, dict<str,str>> is a nested dict
                keys: are a experiment id
                    keys: fields
                    values: experimental values
    '''
    
    # encoding and optarg
    _, _, encoding, *optargs = thisdir.split('/')
    version = optargs[-1]
    optargs = optargs[:-1]
    
    # FUTURE: feature engineering

In [4]:
deep_d = {} 
svm_d = {}
#thisdir encodes model and a version, hiperparams and subversion
for thisdir, thissubdir, thisfiles in os.walk('outputs'):
    # node condition: when this happens than we have 
    # model_name, model_version, model_hparams, subversion
    if not(thissubdir) and thisfiles:
        # this is an "older" format before conll score
        if 'Yhat_valid.csv' in thisfiles:
            filepath= '{:}/Yhat_valid.csv'.format(thisdir)
            d = pd.read_csv(filepath, sep=',', index_col=0).to_dict()            
            ARG_d = d['Y_0'] if  'Y_0' in d else d['Y_ARG']
            valid_evaluator.evaluate(ARG_d)
            deep_d.update(deep_leader_board(thisdir,valid_evaluator.f1))
        elif ('conllscore_valid.txt' in thisfiles) or ('conllscore_train.txt' in thisfiles): 
            if 'conllscore_valid.txt' in thisfiles:
                filepath= '{:}/conllscore_valid.txt'.format(thisdir)
                valid_evaluator.evaluate_fromconllfile(filepath)
            
            if 'conllscore_train.txt' in thisfiles:
                filepath= '{:}/conllscore_train.txt'.format(thisdir)
                train_evaluator.evaluate_fromconllfile(filepath)
                
            deep_d.update(deep_leader_board(thisdir,valid_evaluator.f1, train_evaluator.f1))
            
        elif 'svm' in thisdir and (('train.pickle' in thisfiles) or ('valid.pickle' in thisfiles)):
            if ('train.pickle' in thisfiles):                
                filepath = '{:}/train.pickle'.format(thisdir)
                test_evaluator.evaluate_fromliblinear(filepath, propbank)
                
            if ('valid.pickle' in thisfiles):                
                filepath = '{:}/valid.pickle'.format(thisdir)
                test_evaluator.evaluate_fromliblinear(filepath, propbank)
            print('svm:', test_evaluator.f1, test_evaluator.f1)
#             svm_d.update(svm_leader_board(thisdir,valid_evaluator.f1, train_evaluator.f1))
            

liblinear accuracy:89.62
liblinear mse:6.57
liblinear accuracy:89.62
liblinear mse:6.57
svm: 49.26 49.26


In [5]:
# print(experiments_d)
df = pd.DataFrame.from_dict(deep_d, orient='index')
df.reset_index(drop=True,inplace=True)
df.sort_values('valid-f1', ascending=False, inplace=True)
df.head(50)

Unnamed: 0,model,predictor,version,lr,hidden,ctx-p,embeddings,valid-f1,train-f1,ratio-f1
12,dblstm,crf,3.sanity-check.2.00,0.0005,32x32x32x32,1,glove/s50,96.68,99.45,1.028651
0,blstm,crf,2.00,0.001,128x64,1,glove/s50,41.61,99.02,2.379716
13,dblstm,crf,3.sanity-check.00,0.0005,16x16x16x16,3,glove/s50,39.92,67.77,1.697645
10,dblstm,crf,3.02,0.0005,32x32,1,glove/s50,38.38,65.9,1.71704
11,dblstm,crf,3.00,0.0005,32x32x32x32,1,glove/s50,38.06,92.15,2.421177
14,dblstm,crf,3.sanity-check.00,0.0005,32x32x32x32,3,glove/s50,37.98,98.91,2.604265
2,blstm,crf,2.00,0.0005,16x16x16x16,1,glove/s50,37.91,78.65,2.07465
1,blstm,crf,2.00,0.001,128x64x32,3,glove/s50,37.75,99.22,2.628344
4,blstm,crf,2.00,0.0005,32x32x32x32,1,glove/s50,33.85,92.73,2.739439
7,dblstm,crf,2.00,0.0005,128x64,1,glove/s50,33.23,99.32,2.988865
