## Evaluate PropBankPreannotator on UD_Estonian-EDT gold standard semantic role annotations

In [1]:
import os, os.path

from tqdm import tqdm
from IPython.display import HTML, display

from conv_utils import load_estnltk_texts_from_jsonlines
from frame_eval_utils import get_gold_frame_verbs
from frame_eval_utils import eval_propbank_preannotator_on_sentence_conll
from frame_eval_utils import summarize_eval_accuracies

Assume that UD_Estonian-EDT corpus has been converted into json format previously:

In [2]:
in_dir = 'UD_Estonian-EDT-dev-json'
assert os.path.exists(in_dir), f'(!) Missing input dir {in_dir}'
input_files = [f for f in os.listdir(in_dir) if f.endswith('.jl')]
input_files

['et_edt-ud-dev.jl', 'et_edt-ud-test.jl', 'et_edt-ud-train.jl']

In [3]:
from estnltk.taggers.miscellaneous.propbank.preannotator import PropBankPreannotator
propbank_annotator = \
   PropBankPreannotator( input_syntax_layer='conll_syntax', 
                         add_arg_descriptions=True, 
                         add_arg_feats=True, 
                         add_verb_class=True, debug_output=False)

In [4]:
input_file = 'et_edt-ud-dev.jl'
display(HTML(f'<h2>Evaluating on {input_file}</h2>'))
# Load evaluation sentences from dev part of the corpus
eval_sentences = [sent_text for sent_text in tqdm(load_estnltk_texts_from_jsonlines(os.path.join(in_dir, input_file)),ascii=True )]
# Extract verbs in gold frames
gold_frame_verbs = get_gold_frame_verbs( eval_sentences, discard_frames_wo_args=True )
print('Total verbs in gold frames: ', len(gold_frame_verbs))
# Evaluate
print('Evaluating PropBankPreannotator ... ')
eval_results = dict()
for sent_text in tqdm( eval_sentences, ascii=True ):
    propbank_annotator.tag(sent_text)
    assert propbank_annotator.output_layer in sent_text.relation_layers
    eval_propbank_preannotator_on_sentence_conll( sent_text['conll_syntax'], 
                                                  sent_text[propbank_annotator.output_layer], 
                                                  eval_results, verbose=False )
# Summarize results
frame_results, arg_results = summarize_eval_accuracies(eval_results, return_dataframes=True)
display(HTML('<h3>Frame detection performance</h3>'))
display(frame_results)
display(HTML('<h3>Argument detection performance</h3>'))
display(arg_results)

Loading Text objects from 'UD_Estonian-EDT-dev-json\\et_edt-ud-dev.jl' ...


3122it [00:06, 510.21it/s]
100%|###################################################################################################################################| 3122/3122 [00:00<00:00, 1497040.94it/s]


Total verbs in gold frames:  416
Evaluating PropBankPreannotator ... 


100%|######################################################################################################################################| 3122/3122 [00:01<00:00, 1631.52it/s]


Unnamed: 0,full frame match accuracy,sense match accuracy,evoking verb match accuracy,redundant senses %,redundant frames %,missing frames %,fully matching,sense matching,evoking verb matching,redundant senses,redundant frames,missing frames,total gold frames,total auto frames,discarded gold frames,discarded gold args
0,58.54,79.04,89.86,28.78%,6.53%,10.14%,1916,2587,2941,1151,261,332,3273,3999,0,15


Unnamed: 0,rows,ARG_OVERALL,ARG0,ARG1,ARG2,ARG3,ARG4,ARG5
0,accuracy,85.22,97.26,93.72,64.15,27.98,42.86,14.16
1,matching,3593.0,1138.0,2045.0,315.0,61.0,18.0,16.0
2,missing,623.0,32.0,137.0,176.0,157.0,24.0,97.0
3,redundant,235.0,2.0,59.0,109.0,42.0,23.0,0.0
4,total gold,4216.0,1170.0,2182.0,491.0,218.0,42.0,113.0


In [5]:
input_file = 'et_edt-ud-test.jl'
display(HTML(f'<h2>Evaluating on {input_file}</h2>'))
# Load evaluation sentences from dev part of the corpus
eval_sentences = [sent_text for sent_text in tqdm(load_estnltk_texts_from_jsonlines(os.path.join(in_dir, input_file)),ascii=True )]
# Extract verbs in gold frames
gold_frame_verbs = get_gold_frame_verbs( eval_sentences, discard_frames_wo_args=True )
print('Total verbs in gold frames: ', len(gold_frame_verbs))
# Evaluate
print('Evaluating PropBankPreannotator ... ')
eval_results = dict()
for sent_text in tqdm( eval_sentences, ascii=True ):
    propbank_annotator.tag(sent_text)
    assert propbank_annotator.output_layer in sent_text.relation_layers
    eval_propbank_preannotator_on_sentence_conll( sent_text['conll_syntax'], 
                                                  sent_text[propbank_annotator.output_layer], 
                                                  eval_results, verbose=False )
# Summarize results
frame_results, arg_results = summarize_eval_accuracies(eval_results, return_dataframes=True)
display(HTML('<h3>Frame detection performance</h3>'))
display(frame_results)
display(HTML('<h3>Argument detection performance</h3>'))
display(arg_results)

Loading Text objects from 'UD_Estonian-EDT-dev-json\\et_edt-ud-test.jl' ...


3207it [00:06, 487.31it/s]
100%|###################################################################################################################################| 3207/3207 [00:00<00:00, 1036856.00it/s]


Total verbs in gold frames:  401
Evaluating PropBankPreannotator ... 


100%|######################################################################################################################################| 3207/3207 [00:02<00:00, 1357.48it/s]


Unnamed: 0,full frame match accuracy,sense match accuracy,evoking verb match accuracy,redundant senses %,redundant frames %,missing frames %,fully matching,sense matching,evoking verb matching,redundant senses,redundant frames,missing frames,total gold frames,total auto frames,discarded gold frames,discarded gold args
0,58.16,79.05,89.47,27.76%,6.30%,10.53%,2088,2838,3212,1195,271,378,3590,4304,0,20


Unnamed: 0,rows,ARG_OVERALL,ARG0,ARG1,ARG2,ARG3,ARG4,ARG5
0,accuracy,85.87,97.08,94.47,65.37,35.37,22.86,13.97
1,matching,4064.0,1264.0,2307.0,385.0,81.0,8.0,19.0
2,missing,669.0,38.0,135.0,204.0,148.0,27.0,117.0
3,redundant,249.0,3.0,48.0,143.0,45.0,10.0,0.0
4,total gold,4733.0,1302.0,2442.0,589.0,229.0,35.0,136.0


In [6]:
input_file = 'et_edt-ud-train.jl'
display(HTML(f'<h2>Evaluating on {input_file}</h2>'))
# Load evaluation sentences from dev part of the corpus
eval_sentences = [sent_text for sent_text in tqdm(load_estnltk_texts_from_jsonlines(os.path.join(in_dir, input_file)),ascii=True )]
# Extract verbs in gold frames
gold_frame_verbs = get_gold_frame_verbs( eval_sentences, discard_frames_wo_args=True )
print('Total verbs in gold frames: ', len(gold_frame_verbs))
# Evaluate
print('Evaluating PropBankPreannotator ... ')
eval_results = dict()
for sent_text in tqdm( eval_sentences, ascii=True ):
    propbank_annotator.tag(sent_text)
    assert propbank_annotator.output_layer in sent_text.relation_layers
    eval_propbank_preannotator_on_sentence_conll( sent_text['conll_syntax'], 
                                                  sent_text[propbank_annotator.output_layer], 
                                                  eval_results, verbose=False )
# Summarize results
frame_results, arg_results = summarize_eval_accuracies(eval_results, return_dataframes=True)
display(HTML('<h3>Frame detection performance</h3>'))
display(frame_results)
display(HTML('<h3>Argument detection performance</h3>'))
display(arg_results)

Loading Text objects from 'UD_Estonian-EDT-dev-json\\et_edt-ud-train.jl' ...


24601it [00:46, 524.10it/s] 
100%|#################################################################################################################################| 24601/24601 [00:00<00:00, 1897847.54it/s]


Total verbs in gold frames:  480
Evaluating PropBankPreannotator ... 


100%|####################################################################################################################################| 24601/24601 [00:18<00:00, 1339.65it/s]


Unnamed: 0,full frame match accuracy,sense match accuracy,evoking verb match accuracy,redundant senses %,redundant frames %,missing frames %,fully matching,sense matching,evoking verb matching,redundant senses,redundant frames,missing frames,total gold frames,total auto frames,discarded gold frames,discarded gold args
0,57.99,79.14,89.15,28.03%,6.54%,10.85%,15328,20917,23564,8959,2091,2868,26432,31967,0,106


Unnamed: 0,rows,ARG_OVERALL,ARG0,ARG1,ARG2,ARG3,ARG4,ARG5
0,accuracy,84.89,96.74,94.03,60.53,29.62,27.49,14.51
1,matching,29488.0,9755.0,16606.0,2395.0,508.0,69.0,155.0
2,missing,5247.0,329.0,1054.0,1562.0,1207.0,182.0,913.0
3,redundant,1809.0,20.0,438.0,967.0,287.0,93.0,4.0
4,total gold,34735.0,10084.0,17660.0,3957.0,1715.0,251.0,1068.0
