# Analyze results after treebank alignment

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('paper')
sns.set_style('white')

In [4]:
import pickle

In [5]:
import ne_evaluate_mentions as nem


## Get token data

In [6]:
tokens_ner_with_upos_feats = pd.read_csv('align/spmrl_tokens_df.csv.gz', compression='gzip').reset_index()

In [7]:
tok_gold_dev  = tokens_ner_with_upos_feats[tokens_ner_with_upos_feats.set=='dev']
tok_gold_train = tokens_ner_with_upos_feats[tokens_ner_with_upos_feats.set=='train']
tok_gold_test = tokens_ner_with_upos_feats[tokens_ner_with_upos_feats.set=='test']

In [8]:
tok_gold_dev_sents =   tok_gold_dev.groupby('sent_id')[['misc_token_str', 'biose']].apply(lambda x: x.values.tolist())
tok_gold_train_sents = tok_gold_train.groupby('sent_id')[['misc_token_str', 'biose']].apply(lambda x: x.values.tolist())
tok_gold_test_sents =  tok_gold_test.groupby('sent_id')[['misc_token_str', 'biose']].apply(lambda x: x.values.tolist())
tok_gold_test_sents.head()

sent_id
5439    [[הכל, O], [נושאים, O], [עמם, O], [את, O], [כי...
5440    [[אומר, O], [מזכיר, O], [התק"ם, S-ORG], [,, O]...
5441    [[לא, O], [ייתכן, O], [שעולה, O], [יבוא, O], [...
5442    [[לא, O], [ייתכן, O], [שהוא, O], [יירד, O], [מ...
5443    [[לכן, O], [קבענו, O], [עיקרון, O], [שצריכה, O...
dtype: object

In [9]:
tok_gold_dev_mentions = nem.sents_to_mentions  (tok_gold_dev_sents  )
tok_gold_train_mentions = nem.sents_to_mentions(tok_gold_train_sents)
tok_gold_test_mentions = nem.sents_to_mentions (tok_gold_test_sents )

tok_gold_dev_mentions[:10]

[(1, 'מתאילנד', 'GPE', 1),
 (1, 'לישראל', 'GPE', 1),
 (2, 'בוועדת העבודה והרווחה', 'ORG', 1),
 (2, 'הכנסת', 'ORG', 1),
 (3, 'אורה נמיר', 'PER', 1),
 (3, 'מערך', 'ORG', 1),
 (3, 'לישראל', 'GPE', 1),
 (3, 'מתאילנד', 'GPE', 1),
 (4, 'האוצר', 'ORG', 1),
 (4, 'משרד העבודה והרווחה', 'ORG', 1)]

In [10]:
len(tok_gold_train_sents), len(tok_gold_dev_sents), len(tok_gold_test_sents)

(4937, 500, 706)

In [11]:
len(tok_gold_train), len(tok_gold_dev), len(tok_gold_test)

(93504, 8531, 12619)

In [12]:
len(tok_gold_train_mentions), len(tok_gold_dev_mentions), len(tok_gold_test_mentions)

(6275, 499, 931)

## Get morpheme data

In [13]:
dropped = [5438, 5444, 5445, 5446, 5448, 5449, 5450, 5451, 5453, 5459]

In [14]:
spdf = pd.read_csv('align/spdf_fixed.csv.gz', compression='gzip')

In [15]:
o_morph_gold_dev_sents   =   spdf[spdf.set=='dev'].groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
o_morph_gold_train_sents =   spdf[spdf.set=='train'].groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
o_morph_gold_test_sents  =   spdf[spdf.set=='test'].groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
o_morph_gold_test_sents.head()

sent_id
5438    [[אחרים, O], [ב, O], [ה, B-ORG], [תנועה, I-ORG...
5439    [[הכל, O], [נושאים, O], [עמ, O], [הם, O], [את,...
5440    [[אומר, O], [מזכיר, O], [התק"ם, S-ORG], [,, O]...
5441    [[לא, O], [ייתכן, O], [ש, O], [עולה, O], [יבוא...
5442    [[לא, O], [ייתכן, O], [ש, O], [הוא, O], [יירד,...
dtype: object

In [16]:
spdf = spdf[(~spdf.sent_id.isin(dropped))]

In [17]:
morph_gold_dev  = spdf[spdf.set=='dev']
morph_gold_train = spdf[spdf.set=='train']
morph_gold_test = spdf[spdf.set=='test']

In [18]:
morph_gold_dev_sents   =   morph_gold_dev.groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
morph_gold_train_sents = morph_gold_train.groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
morph_gold_test_sents  =  morph_gold_test.groupby('sent_id')[['form', 'biose']].apply(lambda x: x.values.tolist())
morph_gold_test_sents.head()

sent_id
5439    [[הכל, O], [נושאים, O], [עמ, O], [הם, O], [את,...
5440    [[אומר, O], [מזכיר, O], [התק"ם, S-ORG], [,, O]...
5441    [[לא, O], [ייתכן, O], [ש, O], [עולה, O], [יבוא...
5442    [[לא, O], [ייתכן, O], [ש, O], [הוא, O], [יירד,...
5443    [[לכן, O], [קבענו, O], [עיקרון, O], [ש, O], [צ...
dtype: object

In [19]:
morph_gold_dev_mentions = nem.sents_to_mentions  (morph_gold_dev_sents  )
morph_gold_train_mentions = nem.sents_to_mentions(morph_gold_train_sents)
morph_gold_test_mentions = nem.sents_to_mentions (morph_gold_test_sents )

morph_gold_test_mentions[:10]

[(5440, 'התק"ם', 'ORG', 1),
 (5440, 'מוקי צור', 'PER', 1),
 (5441, 'צה"ל', 'ORG', 1),
 (5447, 'ה מחלקה ל התיישבות', 'ORG', 1),
 (5460, 'חגי בז', 'PER', 1),
 (5460, 'בריה"ם', 'GPE', 1),
 (5460, 'ה קיבוץ ה ארצי', 'ORG', 1),
 (5465, 'ה קיבוץ ה דתי', 'ORG', 1),
 (5465, 'אריה קרול', 'PER', 1),
 (5466, 'ה תנועה ה קיבוצית', 'ORG', 1)]

In [20]:
len(morph_gold_train_sents), len(morph_gold_dev_sents), len(morph_gold_test_sents)

(4937, 500, 706)

In [21]:
len(morph_gold_train), len(morph_gold_dev), len(morph_gold_test)

(127031, 11301, 16828)

In [22]:
len(morph_gold_train_mentions), len(morph_gold_dev_mentions), len(morph_gold_test_mentions)

(6270, 498, 931)

## Evaluate mentions

In [23]:
def eval_pred_mentions(preds, gold_mentions, gold_sents, drop_from_pred=[], drop_from_gold=[], verbose=True):
    #print(preds_path)
    pred_sents_l = [[i, list(zip(toks, bs))] 
                    for i, (toks, bs) 
                    in zip(gold_sents.index.drop(drop_from_gold), (zip(preds[0][2], preds[0][3]))) 
                    if i not in drop_from_pred]
    toks = [[tok for tok, bs in p[1]] for p in pred_sents_l]
    def subfinder(mylist, pattern):
        matches = []
        for i in range(len(mylist)):
            if mylist[i] == pattern[0] and mylist[i:i+len(pattern)] == pattern:
                matches.append(pattern)
        return matches

    split_mentions = [[x[1].split(' ') for x in gold_mentions if x[0]==i] 
                      for i in gold_sents.index.drop(drop_from_gold)
                      if i not in drop_from_gold]
    present = []
    for spl, tok in zip(split_mentions, toks):
        mid = []
        for ment in spl:
            mid.append(len(subfinder(tok, ment))>0)
        present.append(sum(mid))
    if verbose:
        print([(i, x) for i, x in enumerate(zip(present, [len(spl) for spl in split_mentions])) if x[1]-x[0]>=2])
        #print(toks[231])
        print ('only', round(sum(present)/len(gold_mentions), 2), 'available mentions (due to different segmentation)')
    pred_sents = pd.Series(index=[idx for idx, s in pred_sents_l], data=[s for idx, s in pred_sents_l])
    pred_mentions = nem.sents_to_mentions(pred_sents)
    return nem.evaluate_mentions(gold_mentions, pred_mentions, verbose=verbose)

### Evaluate morphemes

In [24]:
import glob

In [25]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_gold_sents = (o_morph_gold_test_sents, o_morph_gold_dev_sents, o_morph_gold_test_sents)
morph_gold_extra_preds = (morph_gold_test_mentions, morph_gold_dev_mentions, morph_gold_test_mentions)
pred_set = ('gold_test', 'yap_dev', 'yap_test')
drop_from_pred = ([dropped], [], [])
drop_from_gold = ([], [], dropped)
morph_res = []
for ep in sorted(glob.glob('results/new_outputs/*morph*-extra_preds-*')):
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_gold_sents[i], drop_from_pred[i], drop_from_gold[i])]
    morph_res.append(res)
    

[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 764 found, 395 correct.
Precision: 0.42
Recall:    0.52
F1:        0.47
FP ex.: ['קופת ה מדינה', 'צומת', 'יצוק בטון דחוס', 'מאגדבורג', 'דן']
FN ex.: ['גרמניה ה נאצית', 'מכבים', 'ארכימדס', 'פארמה', 'מודיעין']
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 368 found, 181 correct.
Precision: 0.36
Recall:    0.49
F1:        0.42
FP ex.: ['לצעוד אל', 'ה ארץ', 'אנגלית', 'אריגה בלוס אוחוס', 'פורד שמרנים']
FN ex.: ['סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון', 'רונלד רייגן']
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), (215, (7, 9)), (235, (1, 3)), (402, (2, 4)), (446, (3, 5)), (456, (4, 6)), (560, (0, 2))]
only 0.87 available mentions (due to different segmentation)
931 mentions, 713 found, 340 correct.
Precision: 0.37
Recall:    0.48
F1:        0.41
FP

In [26]:
defaults = {"use_word": True, "use_pos": False, "embedding_matrix": None, 
"embed_dim": 70, "trainable": True, "input_dropout": False, "stack_lstm": 1,
"epochs": 100, "early_stopping": True, "patience": 20, "min_delta": 0.0001,
"use_char": False, "crf": False, "add_random_embedding": True, "pretrained_embed_dim": 300,
"stack_cross": False, "stack_double": False, "rec_dropout": 0.1,
"validation_split": 0.1}

In [27]:
morph_conf_gold_dev_res = pickle.load(open('results/new_outputs/treebank_morphemes_conf_res_preds_hist.pkl', 'rb'))
morph_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'results': x[1][0] } for i, x in enumerate(morph_conf_gold_dev_res)]
morph_conf_gold_dev_res

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False},
  'results': (51.43603133159269, 39.558232931726906, 44.72190692395005)},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True},
  'results': (51.01522842639594, 40.36144578313253, 45.06726457399103)},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': True,
   'embed_dim': 300},
  'results': (73.20675105485233, 69.67871485943775, 71.39917695473251)},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'pretrained_token_ft',
   'trainable': True,
   'embed_dim': 300},
  'results': (69.5906432748538, 71.6867469879518, 70.62314540059347)},
 {'pred_set': 'gold_dev',
  'conf': 4,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': True,
   'embedding

In [28]:
conf_i = {x['conf']: x['config'] for x in morph_conf_gold_dev_res}
for r in morph_res:
    r['config'] = conf_i[r['conf']]
    
morph_res = morph_res+morph_conf_gold_dev_res

In [29]:
for r in morph_res:
    r['unit'] = 'morpheme'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]
morph_res[0]

{'pred_set': 'gold_test',
 'conf': 0,
 'results': [42.42749731471536, 51.70157068062827, 46.60766961651917],
 'config': {'crf': True,
  'use_pos': False,
  'use_word': True,
  'embedding_matrix': None,
  'embed_dim': 70,
  'trainable': True,
  'input_dropout': False,
  'stack_lstm': 1,
  'epochs': 100,
  'early_stopping': True,
  'patience': 20,
  'min_delta': 0.0001,
  'use_char': False,
  'add_random_embedding': True,
  'pretrained_embed_dim': 300,
  'stack_cross': False,
  'stack_double': False,
  'rec_dropout': 0.1,
  'validation_split': 0.1},
 'unit': 'morpheme'}

In [30]:
def get_results_df(res):
    dict_res = []
    
    for r in res:
        dr = {}
        dr['pred_set'] = r['pred_set']
        dr['unit'] = r['unit']
        dr['prec'], dr['recall'], dr['f1'] = r['results']
        dr.update(r['config'])
        dict_res.append(dr)
    rdf = pd.DataFrame(dict_res)
    return rdf

rdf = get_results_df(morph_res)
rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,46.60767,False,0.0001,20,...,51.701571,False,False,1,True,morpheme,False,False,True,0.1
1,True,True,True,70,,100,41.801386,False,0.0001,20,...,49.184783,False,False,1,True,morpheme,False,False,True,0.1
2,True,True,True,70,,100,41.36253,False,0.0001,20,...,47.685835,False,False,1,True,morpheme,False,False,True,0.1
3,True,True,True,70,,100,57.188161,False,0.0001,20,...,56.295525,False,False,1,True,morpheme,False,True,True,0.1
4,True,True,True,70,,100,47.991543,False,0.0001,20,...,50.669643,False,False,1,True,morpheme,False,True,True,0.1


In [31]:
rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix']).f1.max().unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,yap_ft_sg
pred_set,use_pos,use_char,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gold_dev,False,False,44.721907,70.623145,71.399177
gold_dev,False,True,53.229399,73.684211,75.946548
gold_dev,True,False,45.067265,64.672037,65.292096
gold_dev,True,True,48.752834,65.979381,65.433526
gold_test,False,False,46.60767,68.062827,71.468144
gold_test,False,True,53.734671,72.78798,73.409091
gold_test,True,False,57.188161,74.94577,74.754098
gold_test,True,True,62.751323,76.404494,75.802066
yap_dev,False,False,41.801386,67.477876,66.176471
yap_dev,False,True,48.6618,68.201754,67.168999


### Evaluate tokens

In [32]:
import glob

In [33]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
tok_gold_sents = (tok_gold_test_sents, tok_gold_dev_sents, tok_gold_test_sents)
tok_gold_extra_preds = (tok_gold_test_mentions, tok_gold_dev_mentions, tok_gold_test_mentions)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

tok_res = []
for ep in sorted(glob.glob('results/new_outputs/*tokens*-extra_preds-*')):
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), tok_gold_extra_preds[i], tok_gold_sents[i])]
    tok_res.append(res)
    

[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 877 found, 332 correct.
Precision: 0.36
Recall:    0.38
F1:        0.37
FP ex.: ['הסלט', 'ראשון - לציון', 'זיל ויצנטה', 'האדמו"ר מקליוולנד', 'משמעותו תובנה']
FN ex.: ['במרכז הארץ', 'מכבים', 'ארכימדס', 'הוולגה', 'הוולגה']
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 442 found, 178 correct.
Precision: 0.36
Recall:    0.4
F1:        0.38
FP ex.: ['שהעמיד', 'תגמולים ומימון', 'הרבו', 'סקאיף', 'רמת הייצוג']
FN ex.: ['סילבר', 'וייטנאם', 'אולין', 'ואלן בלום', 'לאפגניסטן']
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 877 found, 332 correct.
Precision: 0.36
Recall:    0.38
F1:        0.37
FP ex.: ['הסלט', 'ראשון - לציון', 'זיל ויצנטה', 'האדמו"ר מקליוולנד', 'משמעותו תובנה']
FN ex.: ['במרכז הארץ', 'מכבים', 'ארכימדס', 'הוולגה', 'הוולגה']
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 906 found, 484 correct.
Precision: 0.52
Recall:

In [34]:
tok_res

[{'pred_set': 'gold_test',
  'conf': 0,
  'results': [35.660580021482275, 37.8563283922463, 36.725663716814154]},
 {'pred_set': 'yap_dev',
  'conf': 0,
  'results': [35.671342685370746, 40.27149321266968, 37.832093517534545]},
 {'pred_set': 'yap_test',
  'conf': 0,
  'results': [35.660580021482275, 37.8563283922463, 36.725663716814154]},
 {'pred_set': 'gold_test',
  'conf': 1,
  'results': [51.98711063372718, 53.42163355408388, 52.694610778443106]},
 {'pred_set': 'yap_dev',
  'conf': 1,
  'results': [44.88977955911824, 52.95508274231678, 48.59002169197397]},
 {'pred_set': 'yap_test',
  'conf': 1,
  'results': [44.3609022556391, 50.18226002430134, 47.092360319270234]},
 {'pred_set': 'gold_test',
  'conf': 10,
  'results': [72.50268528464017, 70.16632016632016, 71.31537242472267]},
 {'pred_set': 'yap_dev',
  'conf': 10,
  'results': [70.14028056112225, 78.125, 73.91763463569167]},
 {'pred_set': 'yap_test',
  'conf': 10,
  'results': [66.9172932330827, 69.76483762597985, 68.31140350877193

In [35]:
tok_conf_gold_dev_res = pickle.load(open('results/new_outputs/treebank_tokens_conf_res_preds_hist.pkl', 'rb'))
tok_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'results': x[1][0] } for i, x in enumerate(tok_conf_gold_dev_res)]
tok_conf_gold_dev_res

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False},
  'results': (38.146551724137936, 35.47094188376754, 36.7601246105919)},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True},
  'results': (51.25858123569794, 44.88977955911824, 47.863247863247864)},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'token_ft_sg',
   'trainable': True,
   'embed_dim': 300},
  'results': (75.66265060240964, 62.925851703406806, 68.7089715536105)},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'pretrained_token_ft',
   'trainable': True,
   'embed_dim': 300},
  'results': (67.56756756756756, 65.13026052104209, 66.32653061224488)},
 {'pred_set': 'gold_dev',
  'conf': 4,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': True,
   'embed

In [36]:
conf_i = {x['conf']: x['config'] for x in tok_conf_gold_dev_res}
for r in tok_res:
    r['config'] = conf_i[r['conf']]
    
tok_res = tok_res+tok_conf_gold_dev_res

In [37]:
for r in tok_res:
    r['unit'] = 'token'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [38]:
tok_res[0]

{'pred_set': 'gold_test',
 'conf': 0,
 'results': [35.660580021482275, 37.8563283922463, 36.725663716814154],
 'config': {'crf': True,
  'use_pos': False,
  'use_word': True,
  'embedding_matrix': None,
  'embed_dim': 70,
  'trainable': True,
  'input_dropout': False,
  'stack_lstm': 1,
  'epochs': 100,
  'early_stopping': True,
  'patience': 20,
  'min_delta': 0.0001,
  'use_char': False,
  'add_random_embedding': True,
  'pretrained_embed_dim': 300,
  'stack_cross': False,
  'stack_double': False,
  'rec_dropout': 0.1,
  'validation_split': 0.1},
 'unit': 'token'}

In [39]:
tok_rdf = get_results_df(tok_res)
tok_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,36.725664,False,0.0001,20,...,37.856328,False,False,1,True,token,False,False,True,0.1
1,True,True,True,70,,100,37.832094,False,0.0001,20,...,40.271493,False,False,1,True,token,False,False,True,0.1
2,True,True,True,70,,100,36.725664,False,0.0001,20,...,37.856328,False,False,1,True,token,False,False,True,0.1
3,True,True,True,70,,100,52.694611,False,0.0001,20,...,53.421634,False,False,1,True,token,False,True,True,0.1
4,True,True,True,70,,100,48.590022,False,0.0001,20,...,52.955083,False,False,1,True,token,False,True,True,0.1


In [40]:
rdf = pd.concat([rdf,tok_rdf])

In [41]:
rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,None,pretrained_token_ft,pretrained_token_ft,token_ft_sg,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme,token,morpheme,token,token,morpheme
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
gold_dev,False,False,44.7,36.8,70.6,66.3,68.7,71.4
gold_dev,False,True,53.2,43.7,73.7,69.3,70.1,75.9
gold_dev,True,False,45.1,47.9,64.7,67.1,68.4,65.3
gold_dev,True,True,48.8,52.2,66.0,68.9,73.0,65.4
gold_test,False,False,46.6,39.2,68.1,66.1,67.5,71.5
gold_test,False,True,53.7,44.0,72.8,70.5,71.8,73.4
gold_test,True,False,57.2,52.7,74.9,70.5,71.8,74.8
gold_test,True,True,62.8,55.9,76.4,72.7,73.9,75.8
yap_dev,False,False,41.8,38.0,67.5,69.2,70.8,66.2
yap_dev,False,True,48.7,44.4,68.2,70.7,72.8,67.2


### Evaluate morphemes fixed

In [46]:
import glob

In [47]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_fixed_gold_sents = (morph_gold_test_sents, morph_gold_dev_sents, morph_gold_test_sents)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

morph_fixed_res = []
for ep in sorted(glob.glob('results/new_outputs/*morphemes_fixed*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_fixed_gold_sents[i])]
    morph_fixed_res.append(res)
    

results/new_outputs/treebank_morphemes_fixed-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 843 found, 438 correct.
Precision: 0.47
Recall:    0.52
F1:        0.49
FP ex.: ['פלפל', 'יצוק בטון דחוס', 'ה ציר ה אנכי', 'מקסימה', 'נוורסון להשאיר']
FN ex.: ['גרמניה ה נאצית', 'מכבים', 'מודיעין', 'קזחסטן', 'גימראש']
results/new_outputs/treebank_morphemes_fixed-0-extra_preds-1.pkl
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 400 found, 184 correct.
Precision: 0.37
Recall:    0.46
F1:        0.41
FP ex.: ['מוחץ', 'לצעוד אל', 'כוכב ה כדורסל ה מהולל', 'קטמון נפלה', 'מען']
FN ex.: ['סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון', 'רונלד רייגן']
results/new_outputs/treebank_morphemes_fixed-0-extra_preds-2.pkl
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), (215, (7, 9)), (235, (1, 3)), (402

In [48]:
from itertools import islice

def get_preds_from_preds(preds, sents=morph_gold_dev_sents, truncate=80):
    i=0
    xtoks = []
    xpreds = []
    for sent in sents:
        t = []
        p = []
        for tok, b in islice(sent, truncate):
            t.append(tok)
            p.append(preds[i])
            i+=1
        xtoks.append(t)
        xpreds.append(p)

    preds_preds = [[None, None, xtoks, xpreds]]
    return preds_preds

In [49]:
morph_fixed_conf_gold_dev_res_o = pickle.load(open('results/new_outputs/treebank_morphemes_fixed_conf_res_preds_hist.pkl', 'rb'))
morph_fixed_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0]), morph_gold_dev_mentions, o_morph_gold_dev_sents)] } 
     for i, x in enumerate(morph_fixed_conf_gold_dev_res_o)]
morph_fixed_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 411 found, 212 correct.
Precision: 0.43
Recall:    0.52
F1:        0.47
FP ex.: ['מוחץ', 'לצעוד אל', 'חשיבה ה ליברלית', 'כוכב ה כדורסל ה מהולל', 'קטמון נפלה']
FN ex.: ['וייטנאם', 'קרן פורד', 'דרום לבנון', 'רונלד רייגן', 'תנועת ה מושבים']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 357 found, 206 correct.
Precision: 0.41
Recall:    0.58
F1:        0.48
FP ex.: ['מקרין', 'תאילנד ל ישראל', 'פורד שמרנים', 'מסצוסטס', 'כהן']
FN ex.: ['סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון', 'דוד מנע']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 460 found, 354 correct.
Precision: 0.71
Recall:    0.77
F1:        0.74
FP ex.: ['כוכב ה כדורסל ה מהולל', 'ה קטיף', 'אסמי ה דגנים', 'ה ארץ', 'גרוסבורד']
FN ex.: ['סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון', 'תנועת ה מושבים']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 414 found, 341 correc

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False, 'patience': 10},
  'conlleval_results': (50.96153846153846,
   42.570281124497996,
   46.389496717724285),
  'results': [42.570281124497996, 51.58150851581509, 46.64466446644665]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True, 'patience': 10},
  'conlleval_results': (56.94444444444444,
   41.164658634538156,
   47.78554778554779),
  'results': [41.365461847389554, 57.70308123249299, 48.18713450292397]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': True,
   'embed_dim': 300,
   'patience': 10},
  'conlleval_results': (73.18087318087319,
   70.68273092369478,
   71.91011235955057),
  'results': [71.08433734939759, 76.95652173913044, 73.90396659707724]},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': T

In [50]:
conf_i = {x['conf']: x['config'] for x in morph_fixed_conf_gold_dev_res}
for r in morph_fixed_res:
    r['config'] = conf_i[r['conf']]
    
morph_fixed_res = morph_fixed_res+morph_fixed_conf_gold_dev_res

In [51]:
for r in morph_fixed_res:
    r['unit'] = 'morpheme_fixed'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [52]:
morph_fixed_res[0]

{'pred_set': 'gold_test',
 'conf': 0,
 'results': [47.046186895810955, 51.95729537366548, 49.379932356257044],
 'config': {'crf': True,
  'use_pos': False,
  'patience': 10,
  'use_word': True,
  'embedding_matrix': None,
  'embed_dim': 70,
  'trainable': True,
  'input_dropout': False,
  'stack_lstm': 1,
  'epochs': 100,
  'early_stopping': True,
  'min_delta': 0.0001,
  'use_char': False,
  'add_random_embedding': True,
  'pretrained_embed_dim': 300,
  'stack_cross': False,
  'stack_double': False,
  'rec_dropout': 0.1,
  'validation_split': 0.1},
 'unit': 'morpheme_fixed'}

In [53]:
morph_fixed_rdf = get_results_df(morph_fixed_res)
morph_fixed_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,49.379932,False,0.0001,10,...,51.957295,False,False,1,True,morpheme_fixed,False,False,True,0.1
1,True,True,True,70,,100,40.979955,False,0.0001,10,...,46.0,False,False,1,True,morpheme_fixed,False,False,True,0.1
2,True,True,True,70,,100,41.498559,False,0.0001,10,...,44.776119,False,False,1,True,morpheme_fixed,False,False,True,0.1
3,True,True,True,70,,100,61.176471,False,0.0001,10,...,60.915868,False,False,1,True,morpheme_fixed,False,True,True,0.1
4,True,True,True,70,,100,49.837486,False,0.0001,10,...,54.117647,False,False,1,True,morpheme_fixed,False,True,True,0.1


In [54]:
morph_fixed_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme_fixed,morpheme_fixed,morpheme_fixed
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,False,False,46.6,74.8,73.9
gold_dev,False,True,55.0,76.1,74.8
gold_dev,True,False,48.2,64.7,66.1
gold_dev,True,True,51.5,69.6,67.5
gold_test,False,False,49.4,71.4,73.7
gold_test,False,True,58.2,73.3,74.9
gold_test,True,False,61.2,74.1,75.0
gold_test,True,True,61.1,75.6,75.8
yap_dev,False,False,41.0,67.5,66.2
yap_dev,False,True,48.7,68.2,67.2


### Evaluate morphemes fixed

In [55]:
import glob

In [56]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_fixed_gold_sents = (morph_gold_test_sents, morph_gold_dev_sents, morph_gold_test_sents)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

morph_fixed2_res = []
for ep in sorted(glob.glob('results/new_outputs/*morphemes_fixed2*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_fixed_gold_sents[i])]
    morph_fixed2_res.append(res)
    

results/new_outputs/treebank_morphemes_fixed2-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 617 found, 365 correct.
Precision: 0.39
Recall:    0.59
F1:        0.47
FP ex.: ['גפריס', 'גבת החמיצה', 'מאכלי ים', 'בן - יהודה פינת אלנבי', 'בולוניה לאיוש']
FN ex.: ['גרמניה ה נאצית', 'מכבים', 'ארכימדס', 'פארמה', 'מודיעין']
results/new_outputs/treebank_morphemes_fixed2-0-extra_preds-1.pkl
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 306 found, 158 correct.
Precision: 0.32
Recall:    0.52
F1:        0.39
FP ex.: ['כוכב ה כדורסל ה מהולל', 'מסצוסטס', 'פרשיות פאפון', 'מושלת טקסס', 'גרסי']
FN ex.: ['ויליאם וולד', 'סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון']
results/new_outputs/treebank_morphemes_fixed2-0-extra_preds-2.pkl
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), (215, (7, 9)), (

In [57]:
morph_fixed2_conf_gold_dev_res_o = pickle.load(open('results/new_outputs/treebank_morphemes_fixed2_conf_res_preds_hist.pkl', 'rb'))
morph_fixed2_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0]), morph_gold_dev_mentions, o_morph_gold_dev_sents)] } 
     for i, x in enumerate(morph_fixed2_conf_gold_dev_res_o)]
morph_fixed2_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 316 found, 184 correct.
Precision: 0.37
Recall:    0.58
F1:        0.45
FP ex.: ['כוכב ה כדורסל ה מהולל', 'מסצוסטס', 'מושלת טקסס', 'גרוסבורד', 'מערב ה תיכון']
FN ex.: ['ויליאם וולד', 'סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 452 found, 297 correct.
Precision: 0.6
Recall:    0.66
F1:        0.63
FP ex.: ['תיאוריית ה קוונטים ו תיאוריית ה יחסות', 'תרצ"ח', 'כוכב ה כדורסל ה מהולל', 'קרן גון', 'בקעה']
FN ex.: ['סילבר', 'וייטנאם', 'קרן פורד', 'דרום לבנון', 'תנועת ה מושבים']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 416 found, 327 correct.
Precision: 0.66
Recall:    0.79
F1:        0.72
FP ex.: ['סן סימון', 'ה קטיף', 'פורד שמרנים', 'ה ארץ', 'חוואי']
FN ex.: ['סילבר', 'וייטנאם', 'קרן פורד', 'לין', 'תנועת ה מושבים']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 383 found, 324

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False, 'patience': 10},
  'conlleval_results': (56.44171779141104,
   36.94779116465863,
   44.66019417475727),
  'results': [36.94779116465863, 58.22784810126582, 45.208845208845204]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True, 'patience': 10},
  'conlleval_results': (64.14686825053995,
   59.63855421686747,
   61.810613943808534),
  'results': [59.63855421686747, 65.7079646017699, 62.52631578947367]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': True,
   'embed_dim': 300,
   'patience': 10},
  'conlleval_results': (75.86206896551724,
   66.26506024096386,
   70.73954983922829),
  'results': [65.66265060240963, 78.60576923076923, 71.55361050328227]},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,

In [58]:
conf_i = {x['conf']: x['config'] for x in morph_fixed2_conf_gold_dev_res}
for r in morph_fixed2_res:
    r['config'] = conf_i[r['conf']]
    
morph_fixed2_res = morph_fixed2_res+morph_fixed2_conf_gold_dev_res

In [59]:
for r in morph_fixed2_res:
    r['unit'] = 'morpheme_fixed2'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [60]:
morph_fixed2_rdf = get_results_df(morph_fixed2_res)
morph_fixed2_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,47.157623,False,0.0001,10,...,59.157212,False,False,1,True,morpheme_fixed2,False,False,True,0.1
1,True,True,True,70,,100,39.303483,False,0.0001,10,...,51.633987,False,False,1,True,morpheme_fixed2,False,False,True,0.1
2,True,True,True,70,,100,40.185063,False,0.0001,10,...,52.233677,False,False,1,True,morpheme_fixed2,False,False,True,0.1
3,True,True,True,70,,100,58.685195,False,0.0001,10,...,58.404255,False,False,1,True,morpheme_fixed2,False,True,True,0.1
4,True,True,True,70,,100,51.022605,False,0.0001,10,...,54.988399,False,False,1,True,morpheme_fixed2,False,True,True,0.1


In [61]:
morph_fixed2_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme_fixed2,morpheme_fixed2,morpheme_fixed2
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,False,False,45.2,73.6,71.6
gold_dev,False,True,55.2,75.7,74.9
gold_dev,True,False,62.5,77.2,78.4
gold_dev,True,True,63.2,78.8,79.1
gold_test,False,False,47.2,69.3,73.7
gold_test,False,True,52.5,71.6,74.9
gold_test,True,False,58.7,74.1,75.0
gold_test,True,True,61.1,75.4,75.8
yap_dev,False,False,39.3,66.4,64.8
yap_dev,False,True,48.7,67.7,66.8


### Evaluate tokens fixed

In [62]:
import glob

In [63]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
pred_set = ('gold_test', 'yap_dev', 'yap_test')

tokens_fixed2_res = []
for ep in sorted(glob.glob('results/new_outputs/*tokens_fixed2*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), tok_gold_extra_preds[i], tok_gold_sents[i])]
    tokens_fixed2_res.append(res)
    

results/new_outputs/treebank_tokens_fixed2-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 741 found, 328 correct.
Precision: 0.35
Recall:    0.44
F1:        0.39
FP ex.: ['למשפחת קנטור', 'אופיר', 'דן', 'חשבונות', 'מכבי איבד']
FN ex.: ['במרכז הארץ', 'מכבים', 'ארכימדס', 'הוולגה', 'הוולגה']
results/new_outputs/treebank_tokens_fixed2-0-extra_preds-1.pkl
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 385 found, 168 correct.
Precision: 0.34
Recall:    0.44
F1:        0.38
FP ex.: ['סן סימון', 'הלבן', 'החברה קדישא לדחוף', 'רמת הייצוג', 'מעובדיו']
FN ex.: ['סילבר', 'וייטנאם', 'אולין', 'דוד מנע', 'רשות שדות התעופה']
results/new_outputs/treebank_tokens_fixed2-0-extra_preds-2.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 741 found, 328 correct.
Precision: 0.35
Recall:    0.44
F1:        0.39
FP ex.: ['למשפחת קנטור', 'אופיר', 'דן', 'חשבונות', 'מכבי איבד']
FN ex.: ['במרכז הארץ', 'מכבים

In [64]:
tokens_fixed2_conf_gold_dev_res_o = pickle.load(open('results/new_outputs/treebank_tokens_fixed2_conf_res_preds_hist.pkl', 'rb'))
tokens_fixed2_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0], sents=tok_gold_dev_sents), tok_gold_dev_mentions, tok_gold_dev_sents)] } 
     for i, x in enumerate(tokens_fixed2_conf_gold_dev_res_o)]
tokens_fixed2_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 385 found, 168 correct.
Precision: 0.34
Recall:    0.44
F1:        0.38
FP ex.: ['סן סימון', 'הלבן', 'החברה קדישא לדחוף', 'רמת הייצוג', 'מעובדיו']
FN ex.: ['סילבר', 'וייטנאם', 'אולין', 'דוד מנע', 'רשות שדות התעופה']
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 500 found, 274 correct.
Precision: 0.55
Recall:    0.55
F1:        0.55
FP ex.: ['הברית', 'קרן גאנט', 'רמת הייצוג', 'המפלגה היריבה', 'באושוויץ']
FN ex.: ['סילבר', 'וייטנאם', 'אולין', 'ניו יורק טיימס', 'לאפגניסטן']
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 442 found, 333 correct.
Precision: 0.67
Recall:    0.75
F1:        0.71
FP ex.: ['בנווה - מונסון', 'חוואי', 'במנזר', 'גרוסבורד', 'בשיקאגו']
FN ex.: ['וייטנאם', 'אולין', 'דוד מנע', 'איובה', 'קרייזלר']
[]
only 1.0 available mentions (due to different segmentation)
499 mentions, 391 found, 308 correct.
Precision: 0.62
Recall:    0.79
F1:   

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'crf': True, 'use_pos': False},
  'conlleval_results': (41.959798994974875,
   33.46693386773547,
   37.23522853957636),
  'results': [33.66733466933868, 43.63636363636363, 38.00904977375565]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'crf': True, 'use_pos': True},
  'conlleval_results': (53.606237816764136,
   55.11022044088176,
   54.347826086956516),
  'results': [54.90981963927856, 54.800000000000004, 54.854854854854864]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'token_ft_sg',
   'trainable': True,
   'embed_dim': 300},
  'conlleval_results': (69.23076923076923,
   66.73346693386773,
   67.95918367346938),
  'results': [66.73346693386773, 75.3393665158371, 70.77577045696069]},
 {'pred_set': 'gold_dev',
  'conf': 3,
  'config': {'add_random_embedding': False,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': '

In [65]:
conf_i = {x['conf']: x['config'] for x in tokens_fixed2_conf_gold_dev_res}
for r in tokens_fixed2_res:
    r['config'] = conf_i[r['conf']]
    
tokens_fixed2_res = tokens_fixed2_res+tokens_fixed2_conf_gold_dev_res

In [66]:
for r in tokens_fixed2_res:
    r['unit'] = 'tokens_fixed2'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [67]:
tokens_fixed2_rdf = get_results_df(tokens_fixed2_res)
tokens_fixed2_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,patience,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,True,True,True,70,,100,39.23445,False,0.0001,20,...,44.264507,False,False,1,True,tokens_fixed2,False,False,True,0.1
1,True,True,True,70,,100,38.00905,False,0.0001,20,...,43.636364,False,False,1,True,tokens_fixed2,False,False,True,0.1
2,True,True,True,70,,100,39.23445,False,0.0001,20,...,44.264507,False,False,1,True,tokens_fixed2,False,False,True,0.1
3,True,True,True,70,,100,51.855618,False,0.0001,20,...,49.227799,False,False,1,True,tokens_fixed2,False,True,True,0.1
4,True,True,True,70,,100,47.927199,False,0.0001,20,...,48.367347,False,False,1,True,tokens_fixed2,False,True,True,0.1


In [68]:
tokens_fixed2_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,pretrained_token_ft,token_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,tokens_fixed2,tokens_fixed2,tokens_fixed2
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,False,False,38.0,69.2,70.8
gold_dev,False,True,43.0,70.4,72.8
gold_dev,True,False,54.9,73.3,75.6
gold_dev,True,True,55.4,76.9,76.7
gold_test,False,False,39.2,65.8,67.5
gold_test,False,True,41.5,70.5,70.9
gold_test,True,False,51.9,69.5,69.4
gold_test,True,True,50.4,72.7,73.9
yap_dev,False,False,38.0,69.2,70.8
yap_dev,False,True,43.0,70.4,72.8


In [69]:
f2rdf = pd.concat([morph_fixed2_rdf,tokens_fixed2_rdf])
f2rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,embedding_matrix,None,None,pretrained_token_ft,pretrained_token_ft,token_ft_sg,yap_ft_sg
Unnamed: 0_level_1,Unnamed: 1_level_1,unit,morpheme_fixed2,tokens_fixed2,morpheme_fixed2,tokens_fixed2,tokens_fixed2,morpheme_fixed2
pred_set,use_pos,use_char,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
gold_dev,False,False,45.2,38.0,73.6,69.2,70.8,71.6
gold_dev,False,True,55.2,43.0,75.7,70.4,72.8,74.9
gold_dev,True,False,62.5,54.9,77.2,73.3,75.6,78.4
gold_dev,True,True,63.2,55.4,78.8,76.9,76.7,79.1
gold_test,False,False,47.2,39.2,69.3,65.8,67.5,73.7
gold_test,False,True,52.5,41.5,71.6,70.5,70.9,74.9
gold_test,True,False,58.7,51.9,74.1,69.5,69.4,75.0
gold_test,True,True,61.1,50.4,75.4,72.7,73.9,75.8
yap_dev,False,False,39.3,38.0,66.4,69.2,70.8,64.8
yap_dev,False,True,48.7,43.0,67.7,70.4,72.8,66.8


### Evaluate morphemes stack

In [70]:
import glob

In [71]:
#extra_preds = [(X_gold_test, X_pos_gold_test, X_char_gold_test), 
#               (X_yap_dev, X_pos_yap_dev, X_char_yap_dev),
#               (X_yap_test, X_pos_yap_test, X_char_yap_test)]
import re
morph_fixed_gold_sents = (morph_gold_test_sents, morph_gold_dev_sents, morph_gold_test_sents)
pred_set = ('gold_test', 'yap_dev', 'yap_test')

morph_stack_res = []
for ep in sorted(glob.glob('results/new_outputs_stack/*morphemes_stack*-extra_preds-*')):
    print(ep)
    i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
    conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
    res = {'pred_set': pred_set[i], 'conf': conf}
    res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), morph_gold_extra_preds[i], morph_fixed_gold_sents[i])]
    morph_stack_res.append(res)
    

results/new_outputs_stack/treebank_morphemes_stack-0-extra_preds-0.pkl
[]
only 1.0 available mentions (due to different segmentation)
931 mentions, 908 found, 701 correct.
Precision: 0.75
Recall:    0.77
F1:        0.76
FP ex.: ['אזור ה וולגה', 'אוסטריה וינה', 'בראגה', 'פורד קאנטו', 'ה כרמל ה אי נראה']
FN ex.: ['לשכת עורכי - ה דין', 'גרמניה ה נאצית', 'סדרי ה דין ה אזרחי', 'מכבים', 'ארכימדס']
results/new_outputs_stack/treebank_morphemes_stack-0-extra_preds-1.pkl
[(56, (0, 3)), (127, (2, 4)), (160, (1, 3)), (326, (0, 2))]
only 0.92 available mentions (due to different segmentation)
498 mentions, 462 found, 332 correct.
Precision: 0.67
Recall:    0.72
F1:        0.69
FP ex.: ['מסצוסטס', 'פורד שמרנים', 'סקאיף', 'להכשילם', 'שצה"ל']
FN ex.: ['וייטנאם', 'קרן פורד', 'דוד מנע', 'תנועת ה מושבים', 'לין']
results/new_outputs_stack/treebank_morphemes_stack-0-extra_preds-2.pkl
[(32, (8, 12)), (94, (0, 2)), (131, (4, 6)), (151, (1, 4)), (156, (10, 12)), (158, (1, 3)), (160, (11, 14)), (169, (1, 3)), 

In [72]:
morph_stack_conf_gold_dev_res_o = pickle.load(open('results/new_outputs_stack/treebank_morphemes_stack_conf_res_preds_hist.pkl', 'rb'))
morph_stack_conf_gold_dev_res = [{'pred_set': 'gold_dev', 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
      'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0]), morph_gold_dev_mentions, o_morph_gold_dev_sents)] } 
     for i, x in enumerate(morph_stack_conf_gold_dev_res_o)]
morph_stack_conf_gold_dev_res

[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 456 found, 366 correct.
Precision: 0.73
Recall:    0.8
F1:        0.77
FP ex.: ['מסצוסטס', 'גרוסבורד', 'כישלון', 'אופרה', 'שיקאגו']
FN ex.: ['וייטנאם', 'קרן פורד', 'דוד מנע', 'תנועת ה מושבים', 'לין']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 449 found, 365 correct.
Precision: 0.73
Recall:    0.81
F1:        0.77
FP ex.: ['חבורת ה חינוך ל מצוינות', 'רעיה', 'הר - ה בית', 'סן סימון', 'ה קטיף']
FN ex.: ['וייטנאם', 'לין', 'דוד מנע', 'תנועת ה מושבים', 'אולין']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 452 found, 382 correct.
Precision: 0.77
Recall:    0.85
F1:        0.8
FP ex.: ['הר - ה בית', 'סן סימון', 'מחוז פאריס', 'ה קטיף', 'ה קטיף']
FN ex.: ['וייטנאם', 'קרן פורד', 'לין', 'תנועת ה מושבים', 'אולין']
[]
only 1.0 available mentions (due to different segmentation)
498 mentions, 468 found, 378 correct.
Precision: 0.76
Recall:    0.81
F1:        0.

[{'pred_set': 'gold_dev',
  'conf': 0,
  'config': {'optimizer': 'adam',
   'output_dropout': True,
   'add_random_embedding': False,
   'use_char': True,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'yap_ft_sg',
   'trainable': False,
   'stack_lstm': 2,
   'patience': 10},
  'conlleval_results': (79.91266375545851,
   73.49397590361446,
   76.56903765690376),
  'results': [73.49397590361446, 80.26315789473685, 76.72955974842768]},
 {'pred_set': 'gold_dev',
  'conf': 1,
  'config': {'optimizer': 'adam',
   'output_dropout': True,
   'add_random_embedding': False,
   'use_char': True,
   'crf': True,
   'use_pos': False,
   'embedding_matrix': 'pretrained_token_ft',
   'trainable': False,
   'stack_lstm': 2,
   'patience': 10},
  'conlleval_results': (79.95594713656388,
   72.89156626506023,
   76.26050420168067),
  'results': [73.29317269076306, 81.29175946547885, 77.08553326293558]},
 {'pred_set': 'gold_dev',
  'conf': 2,
  'config': {'optimizer': 'adam',
   'output_dr

In [73]:
conf_i = {x['conf']: x['config'] for x in morph_stack_conf_gold_dev_res}
for r in morph_stack_res:
    r['config'] = conf_i[r['conf']]
    
morph_stack_res = morph_stack_res+morph_stack_conf_gold_dev_res

KeyError: 10

In [None]:
for r in morph_stack_res:
    r['unit'] = 'morpheme'
    for key in defaults:
        if key not in r['config']:
            r['config'][key] = defaults[key]

In [None]:
morph_stack_rdf = get_results_df(morph_stack_res)
morph_stack_rdf.head()

In [None]:
morph_stack_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'use_char', 'embedding_matrix','unit']).f1.max().unstack(level=[3,4]).round(1)

In [76]:
def evaluate_run(folder, run_name, gold_sents, gold_mentions, pred_set=('gold_test', 'yap_dev', 'yap_test'), unit='morpheme',
                 orig_preds='gold_dev', orig_gold_mentions=morph_gold_dev_mentions, orig_o_gold_sents=o_morph_gold_dev_sents, corpus='treebank', 
                 defaults=defaults):
    results = []
    for ep in sorted(glob.glob(folder+'*'+run_name+'-*-extra_preds-*')):
        i = int(re.search('extra_preds-(\d)', ep).groups(0)[0])
        conf = int(re.search('(\d+)-extra_preds', ep).groups(0)[0])
        res = {'pred_set': pred_set[i], 'conf': conf}
        res['results'] = [x*100 for x in eval_pred_mentions(pickle.load(open(ep, 'rb')), gold_mentions[i], gold_sents[i], verbose=False)]
        results.append(res)
        
    conf_gold_res_o = pickle.load(open(folder+corpus+'_'+run_name+'_conf_res_preds_hist.pkl', 'rb'))
    conf_gold_res = [{'pred_set': orig_preds, 'conf': i, 'config': x[0], 'conlleval_results': x[1][0], 
          'results': [x*100 for x in eval_pred_mentions(get_preds_from_preds(x[2][0], sents=orig_o_gold_sents), orig_gold_mentions, orig_o_gold_sents, verbose=False)] } 
         for i, x in enumerate(conf_gold_res_o)]

    conf_i = {x['conf']: x['config'] for x in conf_gold_res}
    
    for r in results:
        r['config'] = conf_i[r['conf']]

    results = results+conf_gold_res
    for r in results:
        r['unit'] = unit
        for key in defaults:
            if key not in r['config']:
                r['config'][key] = defaults[key]
    rdf = get_results_df(results)

    return rdf


In [77]:
ms_rdf = evaluate_run('results/new_outputs_stack/', 'morphemes_stack', morph_fixed_gold_sents, morph_gold_extra_preds)
ms_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,yap_ft_sg,100,76.237085,False,0.0001,adam,...,77.202643,False,False,2,False,morpheme,True,False,True,0.1
1,False,True,True,70,yap_ft_sg,100,69.166667,False,0.0001,adam,...,71.861472,False,False,2,False,morpheme,True,False,True,0.1
2,False,True,True,70,yap_ft_sg,100,66.298952,False,0.0001,adam,...,68.14059,False,False,2,False,morpheme,True,False,True,0.1
3,False,True,True,70,pretrained_token_ft,100,74.89083,False,0.0001,adam,...,76.137625,False,False,2,False,morpheme,True,False,True,0.1
4,False,True,True,70,pretrained_token_ft,100,67.43941,False,0.0001,adam,...,70.953437,False,False,2,False,morpheme,True,False,True,0.1


In [78]:
ms2_rdf = evaluate_run('results/new_outputs_stack/', 'morphemes_stack2', morph_fixed_gold_sents, morph_gold_extra_preds)
ms2_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,yap_ft_sg,100,66.743383,False,0.0001,adam,...,71.871128,False,False,2,True,morpheme,True,False,True,0.1
1,False,True,True,70,yap_ft_sg,100,57.237569,False,0.0001,adam,...,63.636364,False,False,2,True,morpheme,True,False,True,0.1
2,False,True,True,70,yap_ft_sg,100,58.162668,False,0.0001,adam,...,63.881748,False,False,2,True,morpheme,True,False,True,0.1
3,False,True,True,70,pretrained_token_ft,100,66.333888,False,0.0001,adam,...,68.577982,False,False,2,True,morpheme,True,False,True,0.1
4,False,True,True,70,pretrained_token_ft,100,58.359957,False,0.0001,adam,...,62.131519,False,False,2,True,morpheme,True,False,True,0.1


In [79]:
ts_rdf = evaluate_run('results/new_outputs_stack/', 'tokens_stack', tok_gold_sents, tok_gold_extra_preds, unit='token',
                     orig_gold_mentions=tok_gold_dev_mentions, orig_o_gold_sents=tok_gold_dev_sents,)
ts_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,token_ft_sg,100,71.973094,False,0.0001,adam,...,75.263775,False,False,2,False,token,True,False,True,0.1
1,False,True,True,70,token_ft_sg,100,73.134328,False,0.0001,adam,...,78.132118,False,False,2,False,token,True,False,True,0.1
2,False,True,True,70,token_ft_sg,100,71.973094,False,0.0001,adam,...,75.263775,False,False,2,False,token,True,False,True,0.1
3,False,True,True,70,pretrained_token_ft,100,68.298109,False,0.0001,adam,...,70.818916,False,False,2,False,token,True,False,True,0.1
4,False,True,True,70,pretrained_token_ft,100,71.382637,False,0.0001,adam,...,76.728111,False,False,2,False,token,True,False,True,0.1


In [80]:
ts3_rdf = evaluate_run('results/new_outputs_stack/', 'tokens_stack3', tok_gold_sents, tok_gold_extra_preds, unit='token',
                     orig_gold_mentions=tok_gold_dev_mentions, orig_o_gold_sents=tok_gold_dev_sents,)
ts3_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,yap_w2v_sg,100,65.769456,False,0.0001,adam,...,72.236504,False,False,2,False,token,True,False,True,0.1
1,False,True,True,70,yap_w2v_sg,100,64.228571,False,0.0001,adam,...,74.734043,False,False,2,False,token,True,False,True,0.1
2,False,True,True,70,yap_w2v_sg,100,65.769456,False,0.0001,adam,...,72.236504,False,False,2,False,token,True,False,True,0.1
3,False,True,True,70,yap_w2v_cbow,100,62.405578,False,0.0001,adam,...,67.974684,False,False,2,False,token,True,False,True,0.1
4,False,True,True,70,yap_w2v_cbow,100,61.936937,False,0.0001,adam,...,70.694087,False,False,2,False,token,True,False,True,0.1


In [81]:
ms3_rdf = evaluate_run('results/new_outputs_stack/', 'morphemes_stack3', morph_fixed_gold_sents, morph_gold_extra_preds)
ms3_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,yap_w2v_sg,100,76.0,False,0.0001,adam,...,78.711162,False,False,2,False,morpheme,True,False,True,0.1
1,False,True,True,70,yap_w2v_sg,100,67.168999,False,0.0001,adam,...,72.389791,False,False,2,False,morpheme,True,False,True,0.1
2,False,True,True,70,yap_w2v_sg,100,64.665912,False,0.0001,adam,...,68.383234,False,False,2,False,morpheme,True,False,True,0.1
3,False,True,True,70,yap_w2v_cbow,100,76.999448,False,0.0001,adam,...,79.138322,False,False,2,False,morpheme,True,False,True,0.1
4,False,True,True,70,yap_w2v_cbow,100,69.639066,False,0.0001,adam,...,73.873874,False,False,2,False,morpheme,True,False,True,0.1


In [84]:
tat1_rdf = evaluate_run('results/new_outputs_alt_tok/', 'tokens_alt_tok1', tok_gold_sents, tok_gold_extra_preds, unit='token',
                     orig_gold_mentions=tok_gold_dev_mentions, orig_o_gold_sents=tok_gold_dev_sents,)
tat1_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,alt_tok_token_ft_sg,100,73.270614,False,0.0001,adam,...,75.570776,False,False,2,False,token,True,False,True,0.1
1,False,True,True,70,alt_tok_token_ft_sg,100,73.673036,False,0.0001,adam,...,78.329571,False,False,2,False,token,True,False,True,0.1
2,False,True,True,70,alt_tok_token_ft_sg,100,73.270614,False,0.0001,adam,...,75.570776,False,False,2,False,token,True,False,True,0.1
3,False,True,True,70,alt_tok_token_ft_sg,100,76.027763,False,0.0001,adam,...,75.583864,False,False,2,False,token,True,True,True,0.1
4,False,True,True,70,alt_tok_token_ft_sg,100,69.871795,False,0.0001,adam,...,74.828375,False,False,2,False,token,True,True,True,0.1


In [87]:
mat1_rdf = evaluate_run('results/new_outputs_stack/', 'morphemes_alt_tok1', morph_fixed_gold_sents, morph_gold_extra_preds)
mat1_rdf.head()

Unnamed: 0,add_random_embedding,crf,early_stopping,embed_dim,embedding_matrix,epochs,f1,input_dropout,min_delta,optimizer,...,recall,stack_cross,stack_double,stack_lstm,trainable,unit,use_char,use_pos,use_word,validation_split
0,False,True,True,70,alt_tok_yap_ft_sg,100,74.46458,False,0.0001,adam,...,76.179775,False,False,2,False,morpheme,True,False,True,0.1
1,False,True,True,70,alt_tok_yap_ft_sg,100,68.862912,False,0.0001,adam,...,73.137698,False,False,2,False,morpheme,True,False,True,0.1
2,False,True,True,70,alt_tok_yap_ft_sg,100,63.631316,False,0.0001,adam,...,65.862069,False,False,2,False,morpheme,True,False,True,0.1
3,False,True,True,70,alt_tok_yap_ft_sg,100,74.423592,False,0.0001,adam,...,74.304069,False,False,2,False,morpheme,True,True,True,0.1
4,False,True,True,70,alt_tok_yap_ft_sg,100,66.02358,False,0.0001,adam,...,70.804598,False,False,2,False,morpheme,True,True,True,0.1


### All results

In [88]:
stack_rdf = pd.concat([ms_rdf,ms2_rdf, ts_rdf, ms3_rdf, ts3_rdf])
stack_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'embedding_matrix','unit']).f1.max().unstack(level=[1,3]).round(1)

Unnamed: 0_level_0,use_pos,False,False,True,True
Unnamed: 0_level_1,unit,morpheme,token,morpheme,token
pred_set,embedding_matrix,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,pretrained_token_ft,77.1,71.4,78.3,78.2
gold_dev,token_ft_cbow,75.4,72.1,76.7,76.1
gold_dev,token_ft_sg,76.9,73.1,79.4,78.6
gold_dev,token_glove,76.8,69.2,78.2,75.6
gold_dev,token_w2v_cbow,76.5,69.2,78.3,76.2
gold_dev,token_w2v_sg,75.5,70.0,78.2,75.5
gold_dev,yap_ft_cbow,75.5,63.8,77.8,73.3
gold_dev,yap_ft_sg,76.7,67.6,80.4,72.6
gold_dev,yap_glove,75.1,59.2,77.0,70.4
gold_dev,yap_w2v_cbow,75.7,61.9,78.7,72.8


In [89]:
at_rdf = pd.concat([mat1_rdf, tat1_rdf])
at_rdf.fillna('None').groupby(['pred_set', 'use_pos', 'embedding_matrix','unit']).f1.max().unstack(level=[1,3]).round(1)

Unnamed: 0_level_0,use_pos,False,False,True,True
Unnamed: 0_level_1,unit,token,morpheme,token,morpheme
pred_set,embedding_matrix,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
gold_dev,alt_tok_token_ft_sg,73.7,,77.8,
gold_dev,alt_tok_yap_ft_sg,,76.7,,80.1
gold_test,alt_tok_token_ft_sg,73.3,,76.0,
gold_test,alt_tok_yap_ft_sg,,74.5,,74.4
yap_dev,alt_tok_token_ft_sg,73.7,,69.9,
yap_dev,alt_tok_yap_ft_sg,,68.9,,66.0
yap_test,alt_tok_token_ft_sg,73.3,,69.0,
yap_test,alt_tok_yap_ft_sg,,63.6,,62.5


In [83]:
stack_rdf.groupby(['unit', 'trainable']).f1.mean()

unit      trainable
morpheme  False        70.105272
          True         64.364846
token     False        67.741901
          True         65.853718
Name: f1, dtype: float64

In [99]:
stack_rdf['embedding_unit'] = stack_rdf.embedding_matrix.apply(lambda x: 'morpheme' if 'yap' in x else 'token')
stack_rdf['embedding_method'] = stack_rdf.embedding_matrix.apply(lambda x: 'fasttext' if 'ft' in x else 'glove' if 'glove' in x else 'w2v')
stack_rdf['embedding_arch'] = stack_rdf.embedding_matrix.apply(lambda x: 'skipgram' if 'sg' in x or 'pre' in x else 'cbow' if 'cbow' in x else 'glove')

In [141]:
eval_rdf = stack_rdf[(~stack_rdf.trainable) & (stack_rdf.pred_set.str.contains('dev'))]

In [142]:
eval_rdf.groupby(['unit', 'embedding_unit']).size()

unit      embedding_unit
morpheme  morpheme          20
          token             24
token     morpheme          20
          token             24
dtype: int64

In [143]:
eval_rdf.embedding_unit.value_counts()

token       48
morpheme    40
Name: embedding_unit, dtype: int64

In [154]:
f2rdf[(f2rdf.use_char) & (f2rdf.pred_set.str.contains('dev')) & (f2rdf.embedding_matrix.isna())].fillna('None').groupby(['pred_set', 'use_pos','unit']).f1.max().unstack().round(2)

Unnamed: 0_level_0,unit,morpheme_fixed2,tokens_fixed2
pred_set,use_pos,Unnamed: 2_level_1,Unnamed: 3_level_1
gold_dev,False,55.25,42.96
gold_dev,True,63.22,55.43
yap_dev,False,48.66,42.96
yap_dev,True,51.57,49.04


In [148]:
(eval_rdf[eval_rdf.embedding_matrix.str.startswith('pre')].groupby(['pred_set', 'use_pos', 'unit']).f1.max()
 .unstack().round(2)
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,unit,morpheme,token,dif
pred_set,use_pos,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gold_dev,False,77.09,71.38,-0.079994
gold_dev,True,78.26,78.15,-0.001408
yap_dev,False,67.44,71.38,0.055198
yap_dev,True,64.76,70.83,0.085698


In [None]:
(eval_rdf[eval_rdf.embedding_matrix.str.contains('ft_sg')].groupby(['pred_set', 'use_pos', 'unit']).f1.max()
 .unstack().round(2)
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

In [None]:
(eval_rdf.groupby(['pred_set', 'unit', 'embedding_unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

In [129]:
(eval_rdf.groupby(['pred_set', 'unit', 'embedding_unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,embedding_unit,morpheme,token,dif
pred_set,unit,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gold_dev,morpheme,77.219462,77.195515,-0.00031
gold_dev,token,67.734834,73.560269,0.079193
yap_dev,morpheme,66.644934,66.413443,-0.003486
yap_dev,token,63.14588,69.081864,0.085927


In [130]:
(eval_rdf.groupby(['pred_set', 'embedding_method', 'unit', 'use_pos', 'embedding_unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,embedding_unit,morpheme,token,dif
pred_set,embedding_method,unit,use_pos,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
gold_dev,fasttext,morpheme,False,76.09451,76.14255,0.000631
gold_dev,fasttext,morpheme,True,79.133922,78.069325,-0.013637
gold_dev,fasttext,token,False,65.677478,72.624365,0.095655
gold_dev,fasttext,token,True,72.954947,77.345768,0.056769
gold_dev,glove,morpheme,False,75.130073,76.825397,0.022067
gold_dev,glove,morpheme,True,77.003122,78.170478,0.014933
gold_dev,glove,token,False,59.174312,69.198312,0.144859
gold_dev,glove,token,True,70.378151,75.555556,0.068524
gold_dev,w2v,morpheme,False,75.888995,76.006809,0.00155
gold_dev,w2v,morpheme,True,78.913286,78.260953,-0.008335


In [113]:
(eval_rdf.groupby(['embedding_arch', 'unit', 'embedding_unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,embedding_unit,morpheme,token,dif
embedding_arch,unit,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cbow,morpheme,70.130788,69.324566,-0.01163
cbow,token,64.237293,70.242549,0.085493
glove,morpheme,69.138293,69.642756,0.007244
glove,token,62.400317,67.881583,0.080747
skipgram,morpheme,71.097974,70.370638,-0.010336
skipgram,token,65.819567,71.489096,0.079306


In [114]:
(eval_rdf.groupby(['pred_set', 'unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

unit,morpheme,token,dif
pred_set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gold_dev,77.207489,70.647552,-0.092854
gold_test,74.323672,68.389136,-0.086776
yap_dev,66.529189,66.113872,-0.006282
yap_test,62.191243,64.393003,0.034193


In [115]:
(eval_rdf.groupby(['pred_set', 'embedding_unit', 'use_pos', 'unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,Unnamed: 1_level_0,unit,morpheme,token,dif
pred_set,embedding_unit,use_pos,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gold_dev,morpheme,False,75.819417,63.338955,-0.197042
gold_dev,morpheme,True,78.619507,72.130714,-0.089959
gold_dev,token,False,76.224823,70.719841,-0.077842
gold_dev,token,True,78.166207,76.400698,-0.023109
gold_test,morpheme,False,74.804252,63.529509,-0.177473
gold_test,morpheme,True,74.738524,67.474683,-0.107653
gold_test,token,False,74.24609,69.67591,-0.065592
gold_test,token,True,73.505824,72.876443,-0.008636
yap_dev,morpheme,False,68.389507,63.338955,-0.079738
yap_dev,morpheme,True,64.900362,62.952805,-0.030937


In [117]:
(eval_rdf[eval_rdf.embedding_unit==eval_rdf.unit].groupby(['pred_set', 'use_pos', 'unit']).f1.mean()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,unit,morpheme,token,dif
pred_set,use_pos,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gold_dev,False,75.819417,70.719841,-0.07211
gold_dev,True,78.619507,76.400698,-0.029042
gold_test,False,74.804252,69.67591,-0.073603
gold_test,True,74.738524,72.876443,-0.025551
yap_dev,False,68.389507,70.719841,0.032952
yap_dev,True,64.900362,67.443888,0.037713
yap_test,False,64.552624,69.67591,0.07353
yap_test,True,60.729113,64.639263,0.060492


In [118]:
(eval_rdf[eval_rdf.embedding_unit==eval_rdf.unit].groupby(['pred_set', 'use_pos', 'unit']).f1.max()
 .unstack()
 .assign(dif = lambda x: 1 - x.morpheme/x.token))

Unnamed: 0_level_0,unit,morpheme,token,dif
pred_set,use_pos,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gold_dev,False,76.72956,73.134328,-0.049159
gold_dev,True,80.421053,78.609062,-0.023051
gold_test,False,76.999448,71.973094,-0.069837
gold_test,True,75.832893,74.906766,-0.012364
yap_dev,False,69.639066,73.134328,0.047792
yap_dev,True,67.318132,70.461869,0.044616
yap_test,False,67.042254,71.973094,0.068509
yap_test,True,61.098655,67.164179,0.090309
