# Analyze results after treebank alignment

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('paper')
sns.set_style('white')

In [4]:
import pickle

In [5]:
import ne_evaluate_mentions as nem


## Get token data

In [109]:
tokens_ner_with_upos_feats = pd.read_csv('align/spmrl_tokens_df.csv.gz', compression='gzip').reset_index()

In [110]:
tok_gold_dev  = tokens_ner_with_upos_feats[tokens_ner_with_upos_feats.set=='dev']
tok_gold_train = tokens_ner_with_upos_feats[tokens_ner_with_upos_feats.set=='train']
tok_gold_test = tokens_ner_with_upos_feats[tokens_ner_with_upos_feats.set=='test']

In [111]:
tok_gold_dev_sents =   tok_gold_dev.groupby('sent_id')[['misc_token_str', 'biose']].apply(lambda x: x.values.tolist())
tok_gold_train_sents = tok_gold_train.groupby('sent_id')[['misc_token_str', 'biose']].apply(lambda x: x.values.tolist())
tok_gold_test_sents =  tok_gold_test.groupby('sent_id')[['misc_token_str', 'biose']].apply(lambda x: x.values.tolist())
tok_gold_test_sents.head()

sent_id
5439    [[הכל, O], [נושאים, O], [עמם, O], [את, O], [כי...
5440    [[אומר, O], [מזכיר, O], [התק"ם, S-ORG], [,, O]...
5441    [[לא, O], [ייתכן, O], [שעולה, O], [יבוא, O], [...
5442    [[לא, O], [ייתכן, O], [שהוא, O], [יירד, O], [מ...
5443    [[לכן, O], [קבענו, O], [עיקרון, O], [שצריכה, O...
dtype: object

In [112]:
tok_gold_dev_mentions = nem.sents_to_mentions  (tok_gold_dev_sents  )
tok_gold_train_mentions = nem.sents_to_mentions(tok_gold_train_sents)
tok_gold_test_mentions = nem.sents_to_mentions (tok_gold_test_sents )

tok_gold_dev_mentions[:10]

[(1, 'מתאילנד', 'GPE', 1),
 (1, 'לישראל', 'GPE', 1),
 (2, 'בוועדת העבודה והרווחה', 'ORG', 1),
 (2, 'הכנסת', 'ORG', 1),
 (3, 'אורה נמיר', 'PER', 1),
 (3, 'מערך', 'ORG', 1),
 (3, 'לישראל', 'GPE', 1),
 (3, 'מתאילנד', 'GPE', 1),
 (4, 'האוצר', 'ORG', 1),
 (4, 'משרד העבודה והרווחה', 'ORG', 1)]

## Get morpheme data

In [296]:
dropped = [5438, 5444, 5445, 5446, 5448, 5449, 5450, 5451, 5453, 5459]

In [125]:
spdf = pd.read_csv('align/spdf_fixed.csv.gz', compression='gzip')

In [126]:
o_morph_gold_dev_sents   =   spdf[spdf.set=='dev'].groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
o_morph_gold_train_sents =   spdf[spdf.set=='train'].groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
o_morph_gold_test_sents  =   spdf[spdf.set=='test'].groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
o_morph_gold_test_sents.head()

sent_id
5438    [[אחרים, O], [ב, O], [ה, B-ORG], [תנועה, I-ORG...
5439    [[הכל, O], [נושאים, O], [עמ, O], [הם, O], [את,...
5440    [[אומר, O], [מזכיר, O], [התק"ם, S-ORG], [,, O]...
5441    [[לא, O], [ייתכן, O], [ש, O], [עולה, O], [יבוא...
5442    [[לא, O], [ייתכן, O], [ש, O], [הוא, O], [יירד,...
dtype: object

In [127]:
spdf = spdf[(~spdf.sent_id.isin(dropped))]

In [128]:
morph_gold_dev  = spdf[spdf.set=='dev']
morph_gold_train = spdf[spdf.set=='train']
morph_gold_test = spdf[spdf.set=='test']

In [129]:
morph_gold_dev_sents   =   morph_gold_dev.groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
morph_gold_train_sents = morph_gold_train.groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
morph_gold_test_sents  =  morph_gold_test.groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
morph_gold_test_sents.head()

sent_id
5439    [[הכל, O], [נושאים, O], [עמ, O], [הם, O], [את,...
5440    [[אומר, O], [מזכיר, O], [התק"ם, S-ORG], [,, O]...
5441    [[לא, O], [ייתכן, O], [ש, O], [עולה, O], [יבוא...
5442    [[לא, O], [ייתכן, O], [ש, O], [הוא, O], [יירד,...
5443    [[לכן, O], [קבענו, O], [עיקרון, O], [ש, O], [צ...
dtype: object

In [130]:
morph_gold_dev_mentions = nem.sents_to_mentions  (morph_gold_dev_sents  )
morph_gold_train_mentions = nem.sents_to_mentions(morph_gold_train_sents)
morph_gold_test_mentions = nem.sents_to_mentions (morph_gold_test_sents )

morph_gold_test_mentions[:10]

[(5440, 'התק"ם', 'ORG', 1),
 (5440, 'מוקי צור', 'PER', 1),
 (5441, 'צה"ל', 'ORG', 1),
 (5447, 'ה מחלקה ל התיישבות', 'ORG', 1),
 (5460, 'חגי בז', 'PER', 1),
 (5460, 'בריה"ם', 'GPE', 1),
 (5460, 'ה קיבוץ ה ארצי', 'ORG', 1),
 (5465, 'ה קיבוץ ה דתי', 'ORG', 1),
 (5465, 'אריה קרול', 'PER', 1),
 (5466, 'ה תנועה ה קיבוצית', 'ORG', 1)]

## Evaluate mentions

In [348]:
def eval_pred_mentions(preds, gold_mentions, gold_sents, drop_from_pred=[], drop_from_gold=[]):
    #print(preds_path)
    pred_sents_l = [[i, list(zip(toks, bs))] 
                    for i, (toks, bs) 
                    in zip(gold_sents.index.drop(drop_from_gold), (zip(preds[0][2], preds[0][3]))) 
                    if i not in drop_from_pred]
    toks = [[tok for tok, bs in p[1]] for p in pred_sents_l]
    def subfinder(mylist, pattern):
        matches = []
        for i in range(len(mylist)):
            if mylist[i] == pattern[0] and mylist[i:i+len(pattern)] == pattern:
                matches.append(pattern)
        return matches

    split_mentions = [[x[1].split(' ') for x in gold_mentions if x[0]==i] 
                      for i in gold_sents.index.drop(drop_from_gold)
                      if i not in drop_from_gold]
    present = []
    for spl, tok in zip(split_mentions, toks):
        mid = []
        for ment in spl:
            mid.append(len(subfinder(tok, ment))>0)
        present.append(sum(mid))
    print([(i, x) for i, x in enumerate(zip(present, [len(spl) for spl in split_mentions])) if x[1]-x[0]>=2])
    #print(toks[231])
    print ('only', round(sum(present)/len(gold_mentions), 2), 'available mentions (due to different segmentation)')
    pred_sents = pd.Series(index=[idx for idx, s in pred_sents_l], data=[s for idx, s in pred_sents_l])
    pred_mentions = nem.sents_to_mentions(pred_sents)
    return nem.evaluate_mentions(gold_mentions, pred_mentions)

### Evaluate morphemes

In [319]:
import glob

In [320]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_gold_sents = (o_morph_gold_test_sents, o_morph_gold_dev_sents, o_morph_gold_test_sents)
morph_gold_extra_preds = (morph_gold_test_mentions, morph_gold_dev_mentions, morph_gold_test_mentions)
pred_set = ('gold_test', 'yap_dev', 'yap_test')
drop_from_pred = ([dropped], [], [])
drop_from_gold = ([], [], dropped)
morph_res = []
for ep in sorted(glob.glob('results/new_outputs/*morph*-extra_preds-*')):
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_gold_sents[i], drop_from_pred[i], drop_from_gold[i])]
    morph_res.append(res)
    

results/new_outputs/treebank_morphemes-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 764 found, 395 correct.
Precision: 0.42
Recall:    0.52
F1:        0.47
FP ex.: ['חסילונים מורתחים', 'נוורסון צלפו', 'הרצליה', 'ברית ה מועצות', 'ניו יורק']
FN ex.: ['שפיים', 'פרגו', 'ה וועדה ל מינוי שופטים', 'ת"א', 'בוסטון']
results/new_outputs/treebank_morphemes-0-extra_preds-1.pkl
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 368 found, 181 correct.
Precision: 0.36
Recall:    0.49
F1:        0.42
FP ex.: ['ה שדולה ה פרו', 'קרן ה מדע', 'טקסס', 'ירושלים', 'אסמי ה דגנים']
FN ex.: ['רשות שדות ה תעופה', 'ניו גרסי', 'כריסטין טוד ויטמן', 'סילבר', 'שירות ה תעסוקה']
results/new_outputs/treebank_morphemes-0-extra_preds-2.pkl
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), (215, (7, 9)), (235, (1, 3

In [321]:
defaults = {"use_word": True, "use_pos": False, "embedding_matrix": None, 
"embed_dim": 70, "trainable": True, "input_dropout": False, "stack_lstm": 1,
"epochs": 100, "early_stopping": True, "patience": 20, "min_delta": 0.0001,
"use_char": False, "crf": False, "add_random_embedding": True, "pretrained_embed_dim": 300,
"stack_cross": False, "stack_double": False, "rec_dropout": 0.1,
"validation_split": 0.1}

In [322]:
morph_conf_gold_dev_res = pickle.load(open('results/new_outputs/treebank_morphemes_conf_res_preds_hist.pkl', 'rb'))
morph_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'results': x[1][0] } for i, x in enumerate(morph_conf_gold_dev_res)]
morph_conf_gold_dev_res

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False},
  'results': (51.43603133159269, 39.558232931726906, 44.72190692395005)},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True},
  'results': (51.01522842639594, 40.36144578313253, 45.06726457399103)},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': True,
   'embed_dim': 300},
  'results': (73.20675105485233, 69.67871485943775, 71.39917695473251)},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'pretrained_token_ft',
   'trainable': True,
   'embed_dim': 300},
  'results': (69.5906432748538, 71.6867469879518, 70.62314540059347)},
 {'pred_set': 'gold_dev',
  'conf': 4,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': True,
   'embedding

In [323]:
conf_i = {x['conf']: x['config'] for x in morph_conf_gold_dev_res}
for r in morph_res:
    r['config'] = conf_i[r['conf']]
    
morph_res = morph_res+morph_conf_gold_dev_res

In [324]:
for r in morph_res:
    r['unit'] = 'morpheme'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]
morph_res[0]

{'pred_set': 'gold_test',
 'conf': 0,
 'results': [42.42749731471536, 51.70157068062827, 46.60766961651917],
 'config': {'crf': True,
  'use_pos': False,
  'use_word': True,
  'embedding_matrix': None,
  'embed_dim': 70,
  'trainable': True,
  'input_dropout': False,
  'stack_lstm': 1,
  'epochs': 100,
  'early_stopping': True,
  'patience': 20,
  'min_delta': 0.0001,
  'use_char': False,
  'add_random_embedding': True,
  'pretrained_embed_dim': 300,
  'stack_cross': False,
  'stack_double': False,
  'rec_dropout': 0.1,
  'validation_split': 0.1},
 'unit': 'morpheme'}

In [325]:
def get_results_df(res):
    dict_res = []
    
    for r in res:
        dr = {}
        dr['pred_set'] = r['pred_set']
        dr['unit'] = r['unit']
        dr['prec'], dr['recall'], dr['f1'] = r['results']
        dr.update(r['config'])
        dict_res.append(dr)
    rdf = pd.DataFrame(dict_res)
    return rdf

rdf = get_results_df(morph_res)
rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,46.60767,False,0.0001,20,...,51.701571,False,False,1,True,morpheme,False,False,True,0.1
1,True,True,True,70,,100,41.801386,False,0.0001,20,...,49.184783,False,False,1,True,morpheme,False,False,True,0.1
2,True,True,True,70,,100,41.36253,False,0.0001,20,...,47.685835,False,False,1,True,morpheme,False,False,True,0.1
3,True,True,True,70,,100,57.188161,False,0.0001,20,...,56.295525,False,False,1,True,morpheme,False,True,True,0.1
4,True,True,True,70,,100,47.991543,False,0.0001,20,...,50.669643,False,False,1,True,morpheme,False,True,True,0.1


In [326]:
rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix']).f1.max().unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,yap_ft_sg
pred_set,use_pos,use_char,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gold_dev,False,False,44.721907,70.623145,71.399177
gold_dev,False,True,53.229399,73.684211,75.946548
gold_dev,True,False,45.067265,64.672037,65.292096
gold_dev,True,True,48.752834,65.979381,65.433526
gold_test,False,False,46.60767,68.062827,71.468144
gold_test,False,True,53.734671,72.78798,73.409091
gold_test,True,False,57.188161,74.94577,74.754098
gold_test,True,True,62.751323,76.404494,75.802066
yap_dev,False,False,41.801386,63.95112,66.033755
yap_dev,False,True,46.741573,66.373626,67.114094


### Evaluate tokens

In [327]:
import glob

In [328]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
tok_gold_sents = (tok_gold_test_sents, tok_gold_dev_sents, tok_gold_test_sents)
tok_gold_extra_preds = (tok_gold_test_mentions, tok_gold_dev_mentions, tok_gold_test_mentions)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

tok_res = []
for ep in sorted(glob.glob('results/new_outputs/*tokens*-extra_preds-*')):
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), tok_gold_extra_preds[i], tok_gold_sents[i])]
    tok_res.append(res)
    

results/new_outputs/treebank_tokens-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 877 found, 332 correct.
Precision: 0.36
Recall:    0.38
F1:        0.37
FP ex.: ['רמת השרון', 'נגמר', 'יעקב', 'הלאומים', 'ד שמעון']
FN ex.: ['פרגו', 'לסמפדוריה', 'ת"א', 'בוסטון סלטיקס', 'מייקל האקט']
results/new_outputs/treebank_tokens-0-extra_preds-1.pkl
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 442 found, 178 correct.
Precision: 0.36
Recall:    0.4
F1:        0.38
FP ex.: ['מיניאפוליס', 'והניירות המונפקים', 'בשטחים', 'סמית - ריצרדסון', 'התבוננתי']
FN ex.: ['בית המשפט הבין - לאומי', 'הבית הלבן', 'המיליציה הצרפתית', 'בקליפורניה', 'סילבר']
results/new_outputs/treebank_tokens-0-extra_preds-2.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 877 found, 332 correct.
Precision: 0.36
Recall:    0.38
F1:        0.37
FP ex.: ['רמת השרון', 'נגמר', 'יעקב', 'הלאומים', 'ד שמעון']
FN ex.: ['פרגו', 'לסמפ

In [329]:
tok_res

[{'pred_set': 'gold_test',
  'conf': 0,
  'results': [35.660580021482275, 37.8563283922463, 36.725663716814154]},
 {'pred_set': 'yap_dev',
  'conf': 0,
  'results': [35.671342685370746, 40.27149321266968, 37.832093517534545]},
 {'pred_set': 'yap_test',
  'conf': 0,
  'results': [35.660580021482275, 37.8563283922463, 36.725663716814154]},
 {'pred_set': 'gold_test',
  'conf': 1,
  'results': [51.98711063372718, 53.42163355408388, 52.694610778443106]},
 {'pred_set': 'yap_dev',
  'conf': 1,
  'results': [44.88977955911824, 52.95508274231678, 48.59002169197397]},
 {'pred_set': 'yap_test',
  'conf': 1,
  'results': [44.3609022556391, 50.18226002430134, 47.092360319270234]},
 {'pred_set': 'gold_test',
  'conf': 10,
  'results': [72.50268528464017, 70.16632016632016, 71.31537242472267]},
 {'pred_set': 'yap_dev',
  'conf': 10,
  'results': [70.14028056112225, 78.125, 73.91763463569167]},
 {'pred_set': 'yap_test',
  'conf': 10,
  'results': [66.9172932330827, 69.76483762597985, 68.31140350877193

In [330]:
tok_conf_gold_dev_res = pickle.load(open('results/new_outputs/treebank_tokens_conf_res_preds_hist.pkl', 'rb'))
tok_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'results': x[1][0] } for i, x in enumerate(tok_conf_gold_dev_res)]
tok_conf_gold_dev_res

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False},
  'results': (38.146551724137936, 35.47094188376754, 36.7601246105919)},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True},
  'results': (51.25858123569794, 44.88977955911824, 47.863247863247864)},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'token_ft_sg',
   'trainable': True,
   'embed_dim': 300},
  'results': (75.66265060240964, 62.925851703406806, 68.7089715536105)},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'pretrained_token_ft',
   'trainable': True,
   'embed_dim': 300},
  'results': (67.56756756756756, 65.13026052104209, 66.32653061224488)},
 {'pred_set': 'gold_dev',
  'conf': 4,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': True,
   'embed

In [331]:
conf_i = {x['conf']: x['config'] for x in tok_conf_gold_dev_res}
for r in tok_res:
    r['config'] = conf_i[r['conf']]
    
tok_res = tok_res+tok_conf_gold_dev_res

In [332]:
for r in tok_res:
    r['unit'] = 'token'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [333]:
tok_res[0]

{'pred_set': 'gold_test',
 'conf': 0,
 'results': [35.660580021482275, 37.8563283922463, 36.725663716814154],
 'config': {'crf': True,
  'use_pos': False,
  'use_word': True,
  'embedding_matrix': None,
  'embed_dim': 70,
  'trainable': True,
  'input_dropout': False,
  'stack_lstm': 1,
  'epochs': 100,
  'early_stopping': True,
  'patience': 20,
  'min_delta': 0.0001,
  'use_char': False,
  'add_random_embedding': True,
  'pretrained_embed_dim': 300,
  'stack_cross': False,
  'stack_double': False,
  'rec_dropout': 0.1,
  'validation_split': 0.1},
 'unit': 'token'}

In [334]:
tok_rdf = get_results_df(tok_res)
tok_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,36.725664,False,0.0001,20,...,37.856328,False,False,1,True,token,False,False,True,0.1
1,True,True,True,70,,100,37.832094,False,0.0001,20,...,40.271493,False,False,1,True,token,False,False,True,0.1
2,True,True,True,70,,100,36.725664,False,0.0001,20,...,37.856328,False,False,1,True,token,False,False,True,0.1
3,True,True,True,70,,100,52.694611,False,0.0001,20,...,53.421634,False,False,1,True,token,False,True,True,0.1
4,True,True,True,70,,100,48.590022,False,0.0001,20,...,52.955083,False,False,1,True,token,False,True,True,0.1


In [335]:
rdf = pd.concat([rdf,tok_rdf])

In [336]:
rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,None,pretrained_token_ft,pretrained_token_ft,token_ft_sg,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme,token,morpheme,token,token,morpheme
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
gold_dev,False,False,44.7,36.8,70.6,66.3,68.7,71.4
gold_dev,False,True,53.2,43.7,73.7,69.3,70.1,75.9
gold_dev,True,False,45.1,47.9,64.7,67.1,68.4,65.3
gold_dev,True,True,48.8,52.2,66.0,68.9,73.0,65.4
gold_test,False,False,46.6,36.7,68.1,66.1,64.1,71.5
gold_test,False,True,53.7,44.0,72.8,69.3,71.8,73.4
gold_test,True,False,57.2,52.7,74.9,70.5,71.8,74.8
gold_test,True,True,62.8,55.9,76.4,72.0,71.3,75.8
yap_dev,False,False,41.8,37.8,64.0,68.5,70.6,66.0
yap_dev,False,True,46.7,44.4,66.4,70.7,71.6,67.1


### Evaluate morphemes fixed

In [454]:
import glob

In [455]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_fixed_gold_sents = (morph_gold_test_sents, morph_gold_dev_sents, morph_gold_test_sents)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

morph_fixed_res = []
for ep in sorted(glob.glob('results/new_outputs/*morphemes_fixed*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_fixed_gold_sents[i])]
    morph_fixed_res.append(res)
    

results/new_outputs/treebank_morphemes_fixed-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 843 found, 438 correct.
Precision: 0.47
Recall:    0.52
F1:        0.49
FP ex.: ['בית ה משפט ה מחוזי ב נצרת', 'בושלו', 'כפר בלום', 'רגשנית', 'הרצליה']
FN ex.: ['שפיים', 'פרגו', 'ה וועדה ל מינוי שופטים', 'ת"א', 'בוסטון']
results/new_outputs/treebank_morphemes_fixed-0-extra_preds-1.pkl
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 400 found, 184 correct.
Precision: 0.37
Recall:    0.46
F1:        0.41
FP ex.: ['אינטלקטואל רודני', 'אסמי ה דגנים', 'מג"ב חשו אל', 'קטמון נפלה', 'שיקאגו']
FN ex.: ['ניו גרסי', 'כריסטין טוד ויטמן', 'סילבר', 'שירות ה תעסוקה', 'דווייט מקדונלד']
results/new_outputs/treebank_morphemes_fixed-0-extra_preds-2.pkl
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), (215, 

In [456]:
from itertools import islice

def get_preds_from_preds(preds, sents=morph_gold_dev_sents, truncate=80):
    i=0
    xtoks = []
    xpreds = []
    for sent in sents:
        t = []
        p = []
        for tok, b in islice(sent, truncate):
            t.append(tok)
            p.append(preds[i])
            i+=1
        xtoks.append(t)
        xpreds.append(p)

    preds_preds = [[None, None, xtoks, xpreds]]
    return preds_preds

In [457]:
morph_fixed_conf_gold_dev_res_o = pickle.load(open('results/new_outputs/treebank_morphemes_fixed_conf_res_preds_hist.pkl', 'rb'))
morph_fixed_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0]), morph_gold_dev_mentions, o_morph_gold_dev_sents)] } 
     for i, x in enumerate(morph_fixed_conf_gold_dev_res_o)]
morph_fixed_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 411 found, 212 correct.
Precision: 0.43
Recall:    0.52
F1:        0.47
FP ex.: ['אינטלקטואל רודני', 'אסמי ה דגנים', 'מג"ב חשו אל', 'הנ ל צוותות חשיבה שמרניים', 'קטמון נפלה']
FN ex.: ['כריסטין טוד ויטמן', 'שירות ה תעסוקה', 'דווייט מקדונלד', 'קרן מקארתור', 'משטרת מיניאפוליס']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 357 found, 206 correct.
Precision: 0.41
Recall:    0.58
F1:        0.48
FP ex.: ['טקסס', 'ירושלים', 'שיקאגו', 'הראל', 'קטמון']
FN ex.: ['רשות שדות ה תעופה', 'בוש', 'כריסטין טוד ויטמן', 'ניו גרסי', 'סילבר']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 460 found, 354 correct.
Precision: 0.71
Recall:    0.77
F1:        0.74
FP ex.: ['ירושלים', 'התבוננתי', 'שיקאגו', 'יונה נלהבת', 'הראל']
FN ex.: ['כריסטין טוד ויטמן', 'סילבר', 'מנזר סן סימון', 'ה מלחמה ה קרה', 'מענק " גאונות "']
[]
only 1.0 available mentions (due to different segmentati

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False, 'patience': 10},
  'conlleval_results': (50.96153846153846,
   42.570281124497996,
   46.389496717724285),
  'results': [42.570281124497996, 51.58150851581509, 46.64466446644665]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True, 'patience': 10},
  'conlleval_results': (56.94444444444444,
   41.164658634538156,
   47.78554778554779),
  'results': [41.365461847389554, 57.70308123249299, 48.18713450292397]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': True,
   'embed_dim': 300,
   'patience': 10},
  'conlleval_results': (73.18087318087319,
   70.68273092369478,
   71.91011235955057),
  'results': [71.08433734939759, 76.95652173913044, 73.90396659707724]},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': T

In [458]:
conf_i = {x['conf']: x['config'] for x in morph_fixed_conf_gold_dev_res}
for r in morph_fixed_res:
    r['config'] = conf_i[r['conf']]
    
morph_fixed_res = morph_fixed_res+morph_fixed_conf_gold_dev_res

In [459]:
for r in morph_fixed_res:
    r['unit'] = 'morpheme_fixed'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [460]:
morph_fixed_res[0]

{'pred_set': 'gold_test',
 'conf': 0,
 'results': [47.046186895810955, 51.95729537366548, 49.379932356257044],
 'config': {'crf': True,
  'use_pos': False,
  'patience': 10,
  'use_word': True,
  'embedding_matrix': None,
  'embed_dim': 70,
  'trainable': True,
  'input_dropout': False,
  'stack_lstm': 1,
  'epochs': 100,
  'early_stopping': True,
  'min_delta': 0.0001,
  'use_char': False,
  'add_random_embedding': True,
  'pretrained_embed_dim': 300,
  'stack_cross': False,
  'stack_double': False,
  'rec_dropout': 0.1,
  'validation_split': 0.1},
 'unit': 'morpheme_fixed'}

In [461]:
morph_fixed_rdf = get_results_df(morph_fixed_res)
morph_fixed_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,49.379932,False,0.0001,10,...,51.957295,False,False,1,True,morpheme_fixed,False,False,True,0.1
1,True,True,True,70,,100,40.979955,False,0.0001,10,...,46.0,False,False,1,True,morpheme_fixed,False,False,True,0.1
2,True,True,True,70,,100,41.498559,False,0.0001,10,...,44.776119,False,False,1,True,morpheme_fixed,False,False,True,0.1
3,True,True,True,70,,100,61.176471,False,0.0001,10,...,60.915868,False,False,1,True,morpheme_fixed,False,True,True,0.1
4,True,True,True,70,,100,49.837486,False,0.0001,10,...,54.117647,False,False,1,True,morpheme_fixed,False,True,True,0.1


In [462]:
morph_fixed_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme_fixed,morpheme_fixed,morpheme_fixed
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,False,False,46.6,74.8,73.9
gold_dev,False,True,55.0,76.1,74.8
gold_dev,True,False,48.2,64.7,66.1
gold_dev,True,True,51.5,69.6,67.5
gold_test,False,False,49.4,71.4,70.9
gold_test,False,True,58.2,73.3,74.0
gold_test,True,False,61.2,74.0,73.2
gold_test,True,True,60.7,75.6,74.7
yap_dev,False,False,41.0,67.5,66.2
yap_dev,False,True,48.3,68.2,67.2


### Evaluate morphemes fixed

In [481]:
import glob

In [496]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_fixed_gold_sents = (morph_gold_test_sents, morph_gold_dev_sents, morph_gold_test_sents)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

morph_fixed2_res = []
for ep in sorted(glob.glob('results/new_outputs/*morphemes_fixed2*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_fixed_gold_sents[i])]
    morph_fixed2_res.append(res)
    

results/new_outputs/treebank_morphemes_fixed2-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 617 found, 365 correct.
Precision: 0.39
Recall:    0.59
F1:        0.47
FP ex.: ['אל ה קיסר', 'צומת', 'עו', 'לאט', 'אמאדורה']
FN ex.: ['שפיים', 'פרגו', 'ה וועדה ל מינוי שופטים', 'ת"א', 'בוסטון']
results/new_outputs/treebank_morphemes_fixed2-0-extra_preds-1.pkl
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 306 found, 158 correct.
Precision: 0.32
Recall:    0.52
F1:        0.39
FP ex.: ['ה שדולה ה פרו', 'טקסס', 'ירושלים', 'איתן', 'שיקאגו']
FN ex.: ['ניו גרסי', 'כריסטין טוד ויטמן', 'ויליאם וולד', 'סילבר', 'שירות ה תעסוקה']
results/new_outputs/treebank_morphemes_fixed2-0-extra_preds-2.pkl
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), (215, (7, 9)), (235, (1, 3)), (402, (2, 4)), (446, (

In [497]:
morph_fixed2_conf_gold_dev_res_o = pickle.load(open('results/new_outputs/treebank_morphemes_fixed2_conf_res_preds_hist.pkl', 'rb'))
morph_fixed2_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0]), morph_gold_dev_mentions, o_morph_gold_dev_sents)] } 
     for i, x in enumerate(morph_fixed2_conf_gold_dev_res_o)]
morph_fixed2_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 316 found, 184 correct.
Precision: 0.37
Recall:    0.58
F1:        0.45
FP ex.: ['ה שדולה ה פרו', 'טקסס', 'ירושלים', 'איתן', 'שיקאגו']
FN ex.: ['כריסטין טוד ויטמן', 'ויליאם וולד', 'סילבר', 'שירות ה תעסוקה', 'דווייט מקדונלד']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 452 found, 297 correct.
Precision: 0.6
Recall:    0.66
F1:        0.63
FP ex.: ['טקסס', 'ירושלים', 'שיקאגו', 'קטמון', 'מקארתור']
FN ex.: ['סילבר', 'קרן מקארתור', 'מנזר סן סימון', 'ה מלחמה ה קרה', 'מענק " גאונות "']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 416 found, 327 correct.
Precision: 0.66
Recall:    0.79
F1:        0.72
FP ex.: ['גאנט', 'ירושלים', 'התבוננתי', 'שיקאגו', 'יונה נלהבת']
FN ex.: ['כריסטין טוד ויטמן', 'סילבר', 'קרן מקארתור', 'מנזר סן סימון', 'ה מלחמה ה קרה']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 383 found, 324 correct.
Prec

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False, 'patience': 10},
  'conlleval_results': (56.44171779141104,
   36.94779116465863,
   44.66019417475727),
  'results': [36.94779116465863, 58.22784810126582, 45.208845208845204]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True, 'patience': 10},
  'conlleval_results': (64.14686825053995,
   59.63855421686747,
   61.810613943808534),
  'results': [59.63855421686747, 65.7079646017699, 62.52631578947367]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': True,
   'embed_dim': 300,
   'patience': 10},
  'conlleval_results': (75.86206896551724,
   66.26506024096386,
   70.73954983922829),
  'results': [65.66265060240963, 78.60576923076923, 71.55361050328227]},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,

In [498]:
conf_i = {x['conf']: x['config'] for x in morph_fixed2_conf_gold_dev_res}
for r in morph_fixed2_res:
    r['config'] = conf_i[r['conf']]
    
morph_fixed2_res = morph_fixed2_res+morph_fixed2_conf_gold_dev_res

In [499]:
for r in morph_fixed2_res:
    r['unit'] = 'morpheme_fixed2'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [500]:
morph_fixed2_rdf = get_results_df(morph_fixed2_res)
morph_fixed2_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,47.157623,False,0.0001,10,...,59.157212,False,False,1,True,morpheme_fixed2,False,False,True,0.1
1,True,True,True,70,,100,39.303483,False,0.0001,10,...,51.633987,False,False,1,True,morpheme_fixed2,False,False,True,0.1
2,True,True,True,70,,100,40.185063,False,0.0001,10,...,52.233677,False,False,1,True,morpheme_fixed2,False,False,True,0.1
3,True,True,True,70,,100,58.685195,False,0.0001,10,...,58.404255,False,False,1,True,morpheme_fixed2,False,True,True,0.1
4,True,True,True,70,,100,51.022605,False,0.0001,10,...,54.988399,False,False,1,True,morpheme_fixed2,False,True,True,0.1


In [501]:
morph_fixed2_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme_fixed2,morpheme_fixed2,morpheme_fixed2
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,False,False,45.2,73.6,71.6
gold_dev,False,True,55.2,75.7,74.9
gold_dev,True,False,62.5,77.2,78.4
gold_dev,True,True,63.2,78.8,79.1
gold_test,False,False,47.2,69.3,73.7
gold_test,False,True,52.5,71.6,74.9
gold_test,True,False,58.7,74.1,75.0
gold_test,True,True,61.1,75.4,75.8
yap_dev,False,False,39.3,66.4,64.8
yap_dev,False,True,48.7,67.7,66.8


### Evaluate tokens fixed

In [502]:
import glob

In [503]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
pred_set = ('gold_test', 'yap_dev', 'yap_test')

tokens_fixed2_res = []
for ep in sorted(glob.glob('results/new_outputs/*tokens_fixed2*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), tok_gold_extra_preds[i], tok_gold_sents[i])]
    tokens_fixed2_res.append(res)
    

results/new_outputs/treebank_tokens_fixed2-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 741 found, 328 correct.
Precision: 0.35
Recall:    0.44
F1:        0.39
FP ex.: ['בפיו', 'במנת האנטיפסטו', 'הפומבי', 'רוסיה', 'לעמותה']
FN ex.: ['פרגו', 'לסמפדוריה', 'רוברטו באגו', 'מייקל האקט', 'ובית - המשפט העליון']
results/new_outputs/treebank_tokens_fixed2-0-extra_preds-1.pkl
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 385 found, 168 correct.
Precision: 0.34
Recall:    0.44
F1:        0.38
FP ex.: ['במפרץ הפרסי', 'מחזוריות הפילנטרופיה', 'החינוך למצוינות', 'לדיאלקטיקה', 'החברה קדישא לדחוף']
FN ex.: ['בית המשפט הבין - לאומי', 'הבית הלבן', 'כריסטין טוד ויטמן', 'המיליציה הצרפתית', 'בקליפורניה']
results/new_outputs/treebank_tokens_fixed2-0-extra_preds-2.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 741 found, 328 correct.
Precision: 0.35
Recall:    0.44
F1:        0.39
FP ex.: ['בפי

In [504]:
tokens_fixed2_conf_gold_dev_res_o = pickle.load(open('results/new_outputs/treebank_tokens_fixed2_conf_res_preds_hist.pkl', 'rb'))
tokens_fixed2_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0], sents=tok_gold_dev_sents), tok_gold_dev_mentions, tok_gold_dev_sents)] } 
     for i, x in enumerate(tokens_fixed2_conf_gold_dev_res_o)]
tokens_fixed2_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 385 found, 168 correct.
Precision: 0.34
Recall:    0.44
F1:        0.38
FP ex.: ['במפרץ הפרסי', 'מחזוריות הפילנטרופיה', 'החינוך למצוינות', 'לדיאלקטיקה', 'החברה קדישא לדחוף']
FN ex.: ['בית המשפט הבין - לאומי', 'הבית הלבן', 'כריסטין טוד ויטמן', 'המיליציה הצרפתית', 'בקליפורניה']
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 500 found, 274 correct.
Precision: 0.55
Recall:    0.55
F1:        0.55
FP ex.: ['טקסס', 'אבי', 'האמריקאי', 'ואיה', 'אבידוב']
FN ex.: ['בית המשפט הבין - לאומי', 'הבית הלבן', 'המיליציה הצרפתית', 'סילבר', 'המלחמה']
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 442 found, 333 correct.
Precision: 0.67
Recall:    0.75
F1:        0.71
FP ex.: ['גאנט', 'פומפידו', 'מסצוסטס', 'חטיבת " הראל', 'יונה']
FN ex.: ['הבית הלבן', 'המיליציה הצרפתית', 'המלחמה', 'בשירות התעסוקה', 'ממשרד החוץ']
[]
only 1.0 available mentions (due to different segmentatio

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False},
  'conlleval_results': (41.959798994974875,
   33.46693386773547,
   37.23522853957636),
  'results': [33.66733466933868, 43.63636363636363, 38.00904977375565]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True},
  'conlleval_results': (53.606237816764136,
   55.11022044088176,
   54.347826086956516),
  'results': [54.90981963927856, 54.800000000000004, 54.854854854854864]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'token_ft_sg',
   'trainable': True,
   'embed_dim': 300},
  'conlleval_results': (69.23076923076923,
   66.73346693386773,
   67.95918367346938),
  'results': [66.73346693386773, 75.3393665158371, 70.77577045696069]},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': '

In [505]:
conf_i = {x['conf']: x['config'] for x in tokens_fixed2_conf_gold_dev_res}
for r in tokens_fixed2_res:
    r['config'] = conf_i[r['conf']]
    
tokens_fixed2_res = tokens_fixed2_res+tokens_fixed2_conf_gold_dev_res

In [506]:
for r in tokens_fixed2_res:
    r['unit'] = 'tokens_fixed2'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [507]:
tokens_fixed2_rdf = get_results_df(tokens_fixed2_res)
tokens_fixed2_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,39.23445,False,0.0001,20,...,44.264507,False,False,1,True,tokens_fixed2,False,False,True,0.1
1,True,True,True,70,,100,38.00905,False,0.0001,20,...,43.636364,False,False,1,True,tokens_fixed2,False,False,True,0.1
2,True,True,True,70,,100,39.23445,False,0.0001,20,...,44.264507,False,False,1,True,tokens_fixed2,False,False,True,0.1
3,True,True,True,70,,100,51.855618,False,0.0001,20,...,49.227799,False,False,1,True,tokens_fixed2,False,True,True,0.1
4,True,True,True,70,,100,47.927199,False,0.0001,20,...,48.367347,False,False,1,True,tokens_fixed2,False,True,True,0.1


In [508]:
tokens_fixed2_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,token_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,tokens_fixed2,tokens_fixed2,tokens_fixed2
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,False,False,38.0,69.2,70.8
gold_dev,False,True,43.0,70.4,72.8
gold_dev,True,False,54.9,73.3,75.6
gold_dev,True,True,55.4,76.9,76.7
gold_test,False,False,39.2,65.8,67.5
gold_test,False,True,41.5,70.5,70.9
gold_test,True,False,51.9,69.5,69.4
gold_test,True,True,50.4,72.7,73.9
yap_dev,False,False,38.0,69.2,70.8
yap_dev,False,True,43.0,70.4,72.8


In [509]:
f2rdf = pd.concat([morph_fixed2_rdf,tokens_fixed2_rdf])
f2rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,None,pretrained_token_ft,pretrained_token_ft,token_ft_sg,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme_fixed2,tokens_fixed2,morpheme_fixed2,tokens_fixed2,tokens_fixed2,morpheme_fixed2
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
gold_dev,False,False,45.2,38.0,73.6,69.2,70.8,71.6
gold_dev,False,True,55.2,43.0,75.7,70.4,72.8,74.9
gold_dev,True,False,62.5,54.9,77.2,73.3,75.6,78.4
gold_dev,True,True,63.2,55.4,78.8,76.9,76.7,79.1
gold_test,False,False,47.2,39.2,69.3,65.8,67.5,73.7
gold_test,False,True,52.5,41.5,71.6,70.5,70.9,74.9
gold_test,True,False,58.7,51.9,74.1,69.5,69.4,75.0
gold_test,True,True,61.1,50.4,75.4,72.7,73.9,75.8
yap_dev,False,False,39.3,38.0,66.4,69.2,70.8,64.8
yap_dev,False,True,48.7,43.0,67.7,70.4,72.8,66.8
