## Evaluate PropBankPreannotator on UD_Estonian-EDT gold standard semantic role annotations

In [1]:
import os, os.path

from tqdm import tqdm
from IPython.display import HTML, display

from conv_utils import load_estnltk_texts_from_jsonlines
from frame_eval_utils import get_gold_frame_verbs
from frame_eval_utils import eval_propbank_preannotator_on_sentence_conll
from frame_eval_utils import summarize_eval_accuracies
from frame_eval_utils import sorted_frame_missing_verb_lemmas

Assume that UD_Estonian-EDT corpus has been converted into json format previously:

In [2]:
in_dir = 'UD_Estonian-EDT-dev-json'
assert os.path.exists(in_dir), f'(!) Missing input dir {in_dir}'
input_files = [f for f in os.listdir(in_dir) if f.endswith('.jl')]
input_files

['et_edt-ud-dev.jl', 'et_edt-ud-test.jl', 'et_edt-ud-train.jl']

In [3]:
from estnltk.taggers.miscellaneous.propbank.preannotator import PropBankPreannotator
propbank_annotator = \
   PropBankPreannotator( input_syntax_layer='conll_syntax', 
                         discard_overlapped_frames=True, 
                         add_arg_descriptions=True, 
                         add_arg_feats=True, 
                         add_verb_class=True, debug_output=False)

In [4]:
input_file = 'et_edt-ud-dev.jl'
display(HTML(f'<h2>Evaluating on {input_file}</h2>'))
# Load evaluation sentences from dev part of the corpus
eval_sentences = [sent_text for sent_text in tqdm(load_estnltk_texts_from_jsonlines(os.path.join(in_dir, input_file)),ascii=True )]
# Extract verbs in gold frames
gold_frame_verbs = get_gold_frame_verbs( eval_sentences, discard_frames_wo_args=True )
print('Total verbs in gold frames: ', len(gold_frame_verbs))
# Evaluate
print('Evaluating PropBankPreannotator ... ')
eval_results = dict()
for sent_text in tqdm( eval_sentences, ascii=True ):
    propbank_annotator.tag(sent_text)
    assert propbank_annotator.output_layer in sent_text.relation_layers
    eval_propbank_preannotator_on_sentence_conll( sent_text['conll_syntax'], 
                                                  sent_text[propbank_annotator.output_layer], 
                                                  eval_results, verbose=False )
# Summarize results
frame_results, arg_results = summarize_eval_accuracies(eval_results, return_dataframes=True)
display(HTML('<h3>Frame detection performance</h3>'))
display(frame_results)
display(HTML('<h3>Argument detection performance</h3>'))
display(arg_results)
#display(sorted_frame_missing_verb_lemmas(eval_results))
#display(eval_results['frame_missing_sentences'])

Loading Text objects from 'UD_Estonian-EDT-dev-json\\et_edt-ud-dev.jl' ...


3122it [00:06, 466.71it/s]
100%|############################################################################################################################################################| 3122/3122 [00:00<00:00, 1566154.42it/s]


Total verbs in gold frames:  416
Evaluating PropBankPreannotator ... 


100%|###############################################################################################################################################################| 3122/3122 [00:02<00:00, 1339.61it/s]


Unnamed: 0,full frame match accuracy,sense match accuracy,evoking verb match accuracy,redundant senses %,redundant frames %,missing frames %,fully matching,sense matching,evoking verb matching,redundant senses,redundant frames,missing frames,total gold frames,total auto frames,discarded gold frames,discarded gold args
0,58.2,78.0,89.86,20.43%,7.05%,10.14%,1905,2553,2941,719,248,332,3273,3520,0,15


Unnamed: 0,rows,ARG_OVERALL,ARG0,ARG1,ARG2,ARG3,ARG4,ARG5
0,accuracy,85.83,97.59,93.27,66.18,28.64,53.66,15.46
1,matching,3555.0,1134.0,2008.0,315.0,61.0,22.0,15.0
2,missing,587.0,28.0,145.0,161.0,152.0,19.0,82.0
3,redundant,272.0,2.0,59.0,109.0,63.0,39.0,0.0
4,total gold,4142.0,1162.0,2153.0,476.0,213.0,41.0,97.0


In [5]:
input_file = 'et_edt-ud-test.jl'
display(HTML(f'<h2>Evaluating on {input_file}</h2>'))
# Load evaluation sentences from dev part of the corpus
eval_sentences = [sent_text for sent_text in tqdm(load_estnltk_texts_from_jsonlines(os.path.join(in_dir, input_file)),ascii=True )]
# Extract verbs in gold frames
gold_frame_verbs = get_gold_frame_verbs( eval_sentences, discard_frames_wo_args=True )
print('Total verbs in gold frames: ', len(gold_frame_verbs))
# Evaluate
print('Evaluating PropBankPreannotator ... ')
eval_results = dict()
for sent_text in tqdm( eval_sentences, ascii=True ):
    propbank_annotator.tag(sent_text)
    assert propbank_annotator.output_layer in sent_text.relation_layers
    eval_propbank_preannotator_on_sentence_conll( sent_text['conll_syntax'], 
                                                  sent_text[propbank_annotator.output_layer], 
                                                  eval_results, verbose=False )
# Summarize results
frame_results, arg_results = summarize_eval_accuracies(eval_results, return_dataframes=True)
display(HTML('<h3>Frame detection performance</h3>'))
display(frame_results)
display(HTML('<h3>Argument detection performance</h3>'))
display(arg_results)

Loading Text objects from 'UD_Estonian-EDT-dev-json\\et_edt-ud-test.jl' ...


3207it [00:07, 437.64it/s]
100%|############################################################################################################################################################| 3207/3207 [00:00<00:00, 1003815.89it/s]


Total verbs in gold frames:  401
Evaluating PropBankPreannotator ... 


100%|###############################################################################################################################################################| 3207/3207 [00:02<00:00, 1393.01it/s]


Unnamed: 0,full frame match accuracy,sense match accuracy,evoking verb match accuracy,redundant senses %,redundant frames %,missing frames %,fully matching,sense matching,evoking verb matching,redundant senses,redundant frames,missing frames,total gold frames,total auto frames,discarded gold frames,discarded gold args
0,58.02,77.55,89.47,19.55%,6.70%,10.53%,2083,2784,3212,738,253,378,3590,3775,0,20


Unnamed: 0,rows,ARG_OVERALL,ARG0,ARG1,ARG2,ARG3,ARG4,ARG5
0,accuracy,86.68,97.19,94.82,67.19,35.84,26.67,15.83
1,matching,4003.0,1244.0,2268.0,383.0,81.0,8.0,19.0
2,missing,615.0,36.0,124.0,187.0,145.0,22.0,101.0
3,redundant,249.0,3.0,48.0,143.0,45.0,10.0,0.0
4,total gold,4618.0,1280.0,2392.0,570.0,226.0,30.0,120.0


In [6]:
input_file = 'et_edt-ud-train.jl'
display(HTML(f'<h2>Evaluating on {input_file}</h2>'))
# Load evaluation sentences from dev part of the corpus
eval_sentences = [sent_text for sent_text in tqdm(load_estnltk_texts_from_jsonlines(os.path.join(in_dir, input_file)),ascii=True )]
# Extract verbs in gold frames
gold_frame_verbs = get_gold_frame_verbs( eval_sentences, discard_frames_wo_args=True )
print('Total verbs in gold frames: ', len(gold_frame_verbs))
# Evaluate
print('Evaluating PropBankPreannotator ... ')
eval_results = dict()
for sent_text in tqdm( eval_sentences, ascii=True ):
    propbank_annotator.tag(sent_text)
    assert propbank_annotator.output_layer in sent_text.relation_layers
    eval_propbank_preannotator_on_sentence_conll( sent_text['conll_syntax'], 
                                                  sent_text[propbank_annotator.output_layer], 
                                                  eval_results, verbose=False )
# Summarize results
frame_results, arg_results = summarize_eval_accuracies(eval_results, return_dataframes=True)
display(HTML('<h3>Frame detection performance</h3>'))
display(frame_results)
display(HTML('<h3>Argument detection performance</h3>'))
display(arg_results)

Loading Text objects from 'UD_Estonian-EDT-dev-json\\et_edt-ud-train.jl' ...


24601it [00:51, 481.52it/s] 
100%|##########################################################################################################################################################| 24601/24601 [00:00<00:00, 1010479.20it/s]


Total verbs in gold frames:  480
Evaluating PropBankPreannotator ... 


100%|#############################################################################################################################################################| 24601/24601 [00:20<00:00, 1173.75it/s]


Unnamed: 0,full frame match accuracy,sense match accuracy,evoking verb match accuracy,redundant senses %,redundant frames %,missing frames %,fully matching,sense matching,evoking verb matching,redundant senses,redundant frames,missing frames,total gold frames,total auto frames,discarded gold frames,discarded gold args
0,57.76,77.62,89.15,19.92%,7.09%,10.85%,15268,20516,23564,5600,1994,2868,26432,28110,0,106


Unnamed: 0,rows,ARG_OVERALL,ARG0,ARG1,ARG2,ARG3,ARG4,ARG5
0,accuracy,85.67,96.92,94.17,62.51,30.58,29.39,15.69
1,matching,29003.0,9594.0,16291.0,2393.0,508.0,72.0,145.0
2,missing,4853.0,305.0,1008.0,1435.0,1153.0,173.0,779.0
3,redundant,1845.0,20.0,438.0,964.0,308.0,111.0,4.0
4,total gold,33856.0,9899.0,17299.0,3828.0,1661.0,245.0,924.0
