In [1]:
import csv
import pandas as pd

In [2]:
from estnltk import Layer
from estnltk.taggers import Tagger
from collections import defaultdict
import copy

In [3]:
def get_ner(ner_layer, word_layer, span):
        nertag = None
        if len(ner_layer) > 0:
            word = word_layer.get(span)
            for n in ner_layer:
                for part in n:
                    if part==word:
                        nertag=n.nertag
        if nertag:
            return nertag
        return 'OTHER'
    
def get_POS(word_layer, span):
    infinite_verb_forms = ['da', 'des', 'ma', 'maks', 'mas', 'mast', 'mata', 'nud', 'tav', 'tud', 'v']
    # if POS is ambiguous, only unique tags are kept, e.g. ['V', 'A', 'A'] -> ['V', 'A']
    pos_list = []
    word = word_layer.get(span)
    for i in range(len(word.morph_analysis['partofspeech'])):
        if word.morph_analysis['partofspeech'][i] == 'V':
            if word.morph_analysis['form'][i] in infinite_verb_forms:
                pos_list.append('V_inf')
            elif word.form[i] == 'neg':
                pos_list.append('V_neg')
            else:
                pos_list.append('V_fin')
        else:
            pos_list.append(word.morph_analysis['partofspeech'][i])
    
    if len(pos_list) > 1:
        char_unique = [char for indx, char in enumerate(pos_list) if char not in pos_list[:indx]]
        if len(char_unique) < 2:
            return char_unique[0]
        return '|'.join(char_unique)
    return pos_list[0]

In [13]:
class PhrasePatternConsistencyTagger(Tagger):
    """Tags phrase words that are syntactically wrong or have wrong part-of-speech.""" 
    conf_param = ['rules_file', 'ruleset_map']
    
    def __init__(self, rules_file: str,
                       output_layer='pattern_consistency',
                       morph_analysis_layer='morph_analysis',
                       words_layer='words',
                       syntax_layer='stanza_syntax',
                       ner_layer='ner'):
        
        self.input_layers = [morph_analysis_layer, words_layer, syntax_layer, ner_layer]
        self.output_layer = output_layer
        self.output_attributes = ['phrase', 'syntax', 'pos', 'ner', 'error_source', 'error_mask', 'correction']
        self.rules_file = rules_file
        
        ruleset_map = defaultdict(list)
        
        with open(rules_file, encoding='UTF-8') as csv_file:
            reader = csv.DictReader(csv_file)
            for row in reader:
                info = [row['POS_pattern'], row['NER_pattern'], row['mistake_type'], row['error_mask'], row['correction']]
                ruleset_map[row['tree']].append(info)
                
        self.ruleset_map = ruleset_map

    def _make_layer_template(self):
        # Create new detached layer debased on the configuration
        layer = Layer(name=self.output_layer,
                      text_object=None,
                      attributes=self.output_attributes,
                      enveloping=self.input_layers[1],
                      ambiguous=True)
        return layer
    
    # to be continued...
        
    def _make_layer(self, text, layers, status):
        # Create new layer based on the configuration
        layer = self._make_layer_template()
        # Assign the Text object
        layer.text_object = text
        
        for i in range(len(layers[self.input_layers[2]])): # Iterate over 'stanza_syntax' layer
            pattern_spans = []
            tree = []
            ids = []
            pos = []
            ner = []

            pattern_spans.append(layers[self.input_layers[2]][i])
            ids.append([layers[self.input_layers[2]][i]['id'], layers[self.input_layers[2]][i]['head']])
                
            for j in range(i + 1, len(layers[self.input_layers[2]])):
                for k in range(len(pattern_spans)):
                    if layers[self.input_layers[2]][j] in pattern_spans[k]['children'] or pattern_spans[k] in layers[self.input_layers[2]][j]['children'] or layers[self.input_layers[2]][j]['parent_span'] != None and layers[self.input_layers[2]][j]['parent_span'] == pattern_spans[k]['parent_span']:
                        pattern_spans.append(layers[self.input_layers[2]][j])
                        ids.append([layers[self.input_layers[2]][j]['id'], layers[self.input_layers[2]][j]['head']])
        
                ids_for_pattern = copy.deepcopy(ids)
                for k in range(len(ids_for_pattern)):
                    temp = ids_for_pattern[k][0]
                    ids_for_pattern[k][0] = k+1
                    for l in range(len(ids)):
                        if ids[l][1] == temp:
                            ids_for_pattern[l][1] = ids_for_pattern[k][0]
            
                word_ids = [word_id[0] for word_id in ids_for_pattern]
                for k in range(len(ids_for_pattern)):
                    if ids_for_pattern[k][0] == ids_for_pattern[k][1]:
                        ids_for_pattern[k][1] = 0
                    elif ids_for_pattern[k][1] not in word_ids:
                        ids_for_pattern[k][1] = 0
            
                for k in range(len(pattern_spans)):
                    deprel = pattern_spans[k].deprel
                    if ids_for_pattern[k][1] == 0 and deprel != 'root':
                        deprel = 'root'
                    tree.append([str(ids_for_pattern[k][0]), str(ids_for_pattern[k][1]), deprel])
                    # POS-tag is taken from morph_analysis layer
                    pos.append(get_POS(layers[self.input_layers[1]], pattern_spans[k]))
                    # nertag is taken from ner layer
                    ner.append(get_ner(layers[self.input_layers[-1]], layers[self.input_layers[1]], pattern_spans[k]))                     
                    
                pattern = [" ".join(word_info) for word_info in tree]
                # check if tree pattern exists in ruleset map
                if ",".join(pattern) in self.ruleset_map.keys():
                    pos_pattern = "-".join(pos)
                    ner_pattern = "-".join(ner)
                    # check if POS-sequence exists in ruleset map with given tree pattern
                    for el in self.ruleset_map[",".join(pattern)]:
                        if el[0] == pos_pattern and el[1] == ner_pattern:
                            # add annotations
                            for idx, span in enumerate(pattern_spans):
                                layer.add_annotation([span.base_span], 
                                                 phrase=" ".join([span.text for span in pattern_spans]),
                                                 syntax=pattern[idx],
                                                 pos=pos[idx],
                                                 ner=ner[idx],
                                                 error_source=el[2], 
                                                 error_mask=el[3].split('-')[idx],
                                                 correction=el[4]) # correction is currently "-"                  

                if j == len(layers[self.input_layers[2]]) - 1:
                    pattern_spans = []
                    tree = []
                    ids = []
                    pos = []
                    ner = []
                    break 
        
        return layer

In [14]:
consistency_tagger = PhrasePatternConsistencyTagger(rules_file='indicator_patterns_ner_tree_pos_updated.csv')
consistency_tagger

name,output layer,output attributes,input layers
PhrasePatternConsistencyTagger,pattern_consistency,"('phrase', 'syntax', 'pos', 'ner', 'error_source', 'error_mask', 'correction')","('morph_analysis', 'words', 'stanza_syntax', 'ner')"

0,1
rules_file,indicator_patterns_ner_tree_pos_updated.csv
ruleset_map,"defaultdict(<class 'list'>, {'string': [['string', 'string', 'string', 'string', ..., type: <class 'collections.defaultdict'>, length: 7"


In [15]:
test_df = pd.read_pickle('phrase_examples_filtered_sub10000/atomic_phrases.pkl')

In [16]:
test_500 = []

for idx, row in test_df.iterrows():
    if row['phrase_length'] > 1:
        test_500.append(row['phrase'])
    if len(test_500) == 500:
        break

In [17]:
for phrase in test_500:
    consistency_tagger.tag(phrase)

In [19]:
n_found = 0
for phrase in test_500:
    try:
        if len(phrase.pattern_consistency) > 0:
            for word in phrase.pattern_consistency:
                if word.error_mask[0] == '1':
                    display(phrase)
                    display(phrase.pattern_consistency)
                    n_found+=1
    except:
        break
print(n_found)

text
Defender-18 : Sokrates korras sellega ?

0,1
document_creation_time,2024-03-10T00:43
document_id,705250
phrase_start_end,"(0, 38)"
phrase_type,root_phrase
sentence_id,20676731
sentence_startend,"(65236, 65274)"
subcorpus,jututoavestlus

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,8
compound_tokens,"type, normalized",,tokens,False,1
words,normalized_form,,,True,6
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,6
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,6
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Sokrates'],Sokrates korras,1 2 nmod,H,OTHER,ner,1,-
['korras'],Sokrates korras,2 0 root,S,OTHER,ner,0,-


text
Tartu ülikooli

0,1
document_creation_time,2024-03-10T00:43
document_id,487500
phrase_start_end,"(0, 14)"
phrase_type,nmod_phrase
sentence_id,11085184
sentence_startend,"(227, 375)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Tartu'],Tartu ülikooli,1 2 nmod,H,OTHER,ner,1,-
['ülikooli'],Tartu ülikooli,2 0 root,S,OTHER,ner,0,-


text
TÜ Kliinikumi

0,1
document_creation_time,2024-03-10T00:43
document_id,705025
phrase_start_end,"(58, 71)"
phrase_type,nmod_phrase
sentence_id,18952209
sentence_startend,"(33309, 33490)"
subcorpus,tea

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['TÜ'],TÜ Kliinikumi,1 2 nmod,Y,OTHER,ner,1,-
['Kliinikumi'],TÜ Kliinikumi,2 0 root,H,OTHER,ner,1,-


text
TÜ Kliinikumi

0,1
document_creation_time,2024-03-10T00:43
document_id,705025
phrase_start_end,"(58, 71)"
phrase_type,nmod_phrase
sentence_id,18952209
sentence_startend,"(33309, 33490)"
subcorpus,tea

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['TÜ'],TÜ Kliinikumi,1 2 nmod,Y,OTHER,ner,1,-
['Kliinikumi'],TÜ Kliinikumi,2 0 root,H,OTHER,ner,1,-


text
TÜ Kliinikumi 1ühendlabori

0,1
document_creation_time,2024-03-10T00:43
document_id,705025
phrase_start_end,"(58, 84)"
phrase_type,nmod_phrase
sentence_id,18952209
sentence_startend,"(33309, 33490)"
subcorpus,tea

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,3
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Kliinikumi'],Kliinikumi 1ühendlabori,1 2 nmod,H,OTHER,ner,1,-
['1ühendlabori'],Kliinikumi 1ühendlabori,2 0 root,S,OTHER,ner,0,-


text
Viru peatuses

0,1
document_creation_time,2024-03-10T00:43
document_id,86904
phrase_start_end,"(58, 71)"
phrase_type,obl_phrase
sentence_id,2039231
sentence_startend,"(3175, 3301)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Viru'],Viru peatuses,1 2 nmod,H,OTHER,ner,1,-
['peatuses'],Viru peatuses,2 0 root,S,OTHER,ner,0,-


text
Riia kesklinnas

0,1
document_creation_time,2024-03-10T00:43
document_id,585094
phrase_start_end,"(0, 15)"
phrase_type,obl_phrase
sentence_id,13238380
sentence_startend,"(7, 77)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Riia'],Riia kesklinnas,1 2 nmod,H,OTHER,ner,1,-
['kesklinnas'],Riia kesklinnas,2 0 root,S,OTHER,ner,0,-


text
Viru Apteeki

0,1
document_creation_time,2024-03-10T00:43
document_id,97505
phrase_start_end,"(20, 32)"
phrase_type,obl_phrase
sentence_id,2253883
sentence_startend,"(773, 903)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Viru'],Viru Apteeki,1 2 nmod,H,OTHER,ner,1,-
['Apteeki'],Viru Apteeki,2 0 root,S,OTHER,ner,0,-


text
Tartumaa tööhõiveameti

0,1
document_creation_time,2024-03-10T00:43
document_id,418571
phrase_start_end,"(31, 53)"
phrase_type,nmod_phrase
sentence_id,9113530
sentence_startend,"(3106, 3183)"
subcorpus,aja_ee

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Tartumaa'],Tartumaa tööhõiveameti,1 2 nmod,H,OTHER,ner,1,-
['tööhõiveameti'],Tartumaa tööhõiveameti,2 0 root,S,OTHER,ner,0,-


text
Krimmi Eesti Kultuuriselts

0,1
document_creation_time,2024-03-10T00:43
document_id,525323
phrase_start_end,"(102, 128)"
phrase_type,nsubj_phrase
sentence_id,11951022
sentence_startend,"(0, 180)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,3
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Eesti'],Eesti Kultuuriselts,1 2 nmod,H,OTHER,ner,1,-
['Kultuuriselts'],Eesti Kultuuriselts,2 0 root,S,OTHER,ner,0,-


text
Hansu roll

0,1
document_creation_time,2024-03-10T00:43
document_id,411
phrase_start_end,"(32, 42)"
phrase_type,nsubj_phrase
sentence_id,21445
sentence_startend,"(3436, 3525)"
subcorpus,aja_kr

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Hansu'],Hansu roll,1 2 nmod,H,OTHER,ner,1,-
['roll'],Hansu roll,2 0 root,S,OTHER,ner,0,-


text
Prantsuse Lütseumi

0,1
document_creation_time,2024-03-10T00:44
document_id,476969
phrase_start_end,"(38, 56)"
phrase_type,nmod_phrase
sentence_id,10808502
sentence_startend,"(2167, 2333)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Prantsuse'],Prantsuse Lütseumi,1 2 nmod,G,OTHER,ner,1,-
['Lütseumi'],Prantsuse Lütseumi,2 0 root,S,OTHER,ner,0,-


text
Ryderi südame

0,1
document_creation_time,2024-03-10T00:44
document_id,605950
phrase_start_end,"(30, 43)"
phrase_type,obj_phrase
sentence_id,13703867
sentence_startend,"(1133, 1186)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Ryderi'],Ryderi südame,1 2 nmod,H,OTHER,ner,1,-
['südame'],Ryderi südame,2 0 root,S,OTHER,ner,0,-


text
Viru tänavale

0,1
document_creation_time,2024-03-10T00:44
document_id,431141
phrase_start_end,"(21, 34)"
phrase_type,obl_phrase
sentence_id,9487957
sentence_startend,"(2598, 2762)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Viru'],Viru tänavale,1 2 nmod,H,OTHER,ner,1,-
['tänavale'],Viru tänavale,2 0 root,S,OTHER,ner,0,-


text
Kivi töös

0,1
document_creation_time,2024-03-10T00:44
document_id,112335
phrase_start_end,"(42, 51)"
phrase_type,obl_phrase
sentence_id,2516220
sentence_startend,"(540, 598)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Kivi'],Kivi töös,1 2 nmod,H,OTHER,ner,1,-
['töös'],Kivi töös,2 0 root,S,OTHER,ner,0,-


text
Rahva Usaldus

0,1
document_creation_time,2024-03-10T00:44
document_id,50055
phrase_start_end,"(13, 26)"
phrase_type,nmod_phrase
sentence_id,1301691
sentence_startend,"(12, 170)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Rahva'],Rahva Usaldus,1 2 nmod,H,OTHER,ner,1,-
['Usaldus'],Rahva Usaldus,2 0 root,S,OTHER,ner,0,-


text
Lasnamäe linnaosavanema

0,1
document_creation_time,2024-03-10T00:44
document_id,50055
phrase_start_end,"(103, 126)"
phrase_type,nmod_phrase
sentence_id,1301691
sentence_startend,"(12, 170)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Lasnamäe'],Lasnamäe linnaosavanema,1 2 nmod,H,OTHER,ner,1,-
['linnaosavanema'],Lasnamäe linnaosavanema,2 0 root,S,OTHER,ner,0,-


text
Almásy meeled

0,1
document_creation_time,2024-03-10T00:44
document_id,200476
phrase_start_end,"(40, 53)"
phrase_type,obj_phrase
sentence_id,3875912
sentence_startend,"(2404, 2550)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Almásy'],Almásy meeled,1 2 nmod,H,OTHER,ner,1,-
['meeled'],Almásy meeled,2 0 root,S,OTHER,ner,0,-


text
Ilvese kinnitusel

0,1
document_creation_time,2024-03-10T00:44
document_id,5898
phrase_start_end,"(0, 17)"
phrase_type,obl_phrase
sentence_id,419625
sentence_startend,"(673, 733)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Ilvese'],Ilvese kinnitusel,1 2 nmod,H,OTHER,ner,1,-
['kinnitusel'],Ilvese kinnitusel,2 0 root,S,OTHER,ner,0,-


text
Eesti Meremuuseum

0,1
document_creation_time,2024-03-10T00:44
document_id,556469
phrase_start_end,"(0, 17)"
phrase_type,nsubj_phrase
sentence_id,12643943
sentence_startend,"(179, 296)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Eesti'],Eesti Meremuuseum,1 2 nmod,H,OTHER,ner,1,-
['Meremuuseum'],Eesti Meremuuseum,2 0 root,S,OTHER,ner,0,-


text
Tallinna reisisadama

0,1
document_creation_time,2024-03-10T00:44
document_id,556469
phrase_start_end,"(73, 93)"
phrase_type,nmod_phrase
sentence_id,12643943
sentence_startend,"(179, 296)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Tallinna'],Tallinna reisisadama,1 2 nmod,H,OTHER,ner,1,-
['reisisadama'],Tallinna reisisadama,2 0 root,S,OTHER,ner,0,-


text
Kuu aega tagasi

0,1
document_creation_time,2024-03-10T00:44
document_id,417672
phrase_start_end,"(0, 15)"
phrase_type,obl_phrase
sentence_id,9086283
sentence_startend,"(1880, 2084)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,1
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,3
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Kuu'],Kuu aega,1 2 nmod,H,OTHER,ner,1,-
['aega'],Kuu aega,2 0 root,S,OTHER,ner,0,-


text
Tallinna haigekassal

0,1
document_creation_time,2024-03-10T00:44
document_id,417672
phrase_start_end,"(95, 115)"
phrase_type,obl_phrase
sentence_id,9086283
sentence_startend,"(1880, 2084)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Tallinna'],Tallinna haigekassal,1 2 nmod,H,OTHER,ner,1,-
['haigekassal'],Tallinna haigekassal,2 0 root,S,OTHER,ner,0,-


text
Hoiupanga Lõuna esinduse

0,1
document_creation_time,2024-03-10T00:44
document_id,413713
phrase_start_end,"(84, 108)"
phrase_type,nmod_phrase
sentence_id,8965863
sentence_startend,"(623, 760)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,3
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Lõuna'],Lõuna esinduse,1 2 nmod,H,OTHER,ner,1,-
['esinduse'],Lõuna esinduse,2 0 root,S,OTHER,ner,0,-


text
Tartu Triatloni

0,1
document_creation_time,2024-03-10T00:44
document_id,441354
phrase_start_end,"(75, 90)"
phrase_type,nmod_phrase
sentence_id,9798923
sentence_startend,"(1280, 1384)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Tartu'],Tartu Triatloni,1 2 nmod,H,OTHER,ner,1,-
['Triatloni'],Tartu Triatloni,2 0 root,S,OTHER,ner,0,-


text
soome ajakirjanik

0,1
document_creation_time,2024-03-10T00:44
document_id,96905
phrase_start_end,"(21, 38)"
phrase_type,nsubj_phrase
sentence_id,2241879
sentence_startend,"(876, 1078)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['soome'],soome ajakirjanik,1 2 amod,G,OTHER,ner,1,-
['ajakirjanik'],soome ajakirjanik,2 0 root,S,OTHER,ner,0,-


text
Mustamäe haiglas

0,1
document_creation_time,2024-03-10T00:44
document_id,322939
phrase_start_end,"(0, 16)"
phrase_type,nmod_phrase
sentence_id,6048777
sentence_startend,"(992, 1161)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Mustamäe'],Mustamäe haiglas,1 2 nmod,H,OTHER,ner,1,-
['haiglas'],Mustamäe haiglas,2 0 root,S,OTHER,ner,0,-


text
Eesti Energias

0,1
document_creation_time,2024-03-10T00:44
document_id,322939
phrase_start_end,"(61, 75)"
phrase_type,nmod_phrase
sentence_id,6048777
sentence_startend,"(992, 1161)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Eesti'],Eesti Energias,1 2 nmod,H,OTHER,ner,1,-
['Energias'],Eesti Energias,2 0 root,S,OTHER,ner,0,-


text
Kirijenko kandidatuuri

0,1
document_creation_time,2024-03-10T00:44
document_id,585399
phrase_start_end,"(81, 103)"
phrase_type,nmod_phrase
sentence_id,13244870
sentence_startend,"(6635, 6757)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Kirijenko'],Kirijenko kandidatuuri,1 2 nmod,H,OTHER,ner,1,-
['kandidatuuri'],Kirijenko kandidatuuri,2 0 root,S,OTHER,ner,0,-


text
Kuulsa paari austajate

0,1
document_creation_time,2024-03-10T00:44
document_id,8965
phrase_start_end,"(0, 22)"
phrase_type,nmod_phrase
sentence_id,487148
sentence_startend,"(744, 917)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,3
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Kuulsa'],Kuulsa paari,1 2 nmod,H,OTHER,ner,1,-
['paari'],Kuulsa paari,2 0 root,N,OTHER,ner,0,-


text
Mõõdukate pingi

0,1
document_creation_time,2024-03-10T00:44
document_id,552792
phrase_start_end,"(5, 20)"
phrase_type,nmod_phrase
sentence_id,12561595
sentence_startend,"(710, 892)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Mõõdukate'],Mõõdukate pingi,1 2 nmod,H,OTHER,ner,1,-
['pingi'],Mõõdukate pingi,2 0 root,S,OTHER,ner,0,-


text
Eesti Päevaleht

0,1
document_creation_time,2024-03-10T00:44
document_id,366335
phrase_start_end,"(0, 15)"
phrase_type,nsubj_phrase
sentence_id,6965563
sentence_startend,"(512, 552)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Eesti'],Eesti Päevaleht,1 2 nmod,H,OTHER,ner,1,-
['Päevaleht'],Eesti Päevaleht,2 0 root,H,OTHER,ner,1,-


text
Eesti Päevaleht

0,1
document_creation_time,2024-03-10T00:44
document_id,366335
phrase_start_end,"(0, 15)"
phrase_type,nsubj_phrase
sentence_id,6965563
sentence_startend,"(512, 552)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Eesti'],Eesti Päevaleht,1 2 nmod,H,OTHER,ner,1,-
['Päevaleht'],Eesti Päevaleht,2 0 root,H,OTHER,ner,1,-


text
Tartu-Põlva-Luhamaa maanteel

0,1
document_creation_time,2024-03-10T00:44
document_id,443909
phrase_start_end,"(22, 50)"
phrase_type,obl_phrase
sentence_id,9872823
sentence_startend,"(456, 564)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,6
compound_tokens,"type, normalized",,tokens,False,1
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Tartu-Põlva-Luhamaa'],Tartu-Põlva-Luhamaa maanteel,1 2 nmod,H,OTHER,ner,1,-
['maanteel'],Tartu-Põlva-Luhamaa maanteel,2 0 root,S,OTHER,ner,0,-


text
Harju maakohus

0,1
document_creation_time,2024-03-10T00:44
document_id,434491
phrase_start_end,"(27, 41)"
phrase_type,nsubj_phrase
sentence_id,9589488
sentence_startend,"(1266, 1452)"
subcorpus,aja_pm

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Harju'],Harju maakohus,1 2 nmod,H,OTHER,ner,1,-
['maakohus'],Harju maakohus,2 0 root,S,OTHER,ner,0,-


text
414 Inglise õiguses

0,1
document_creation_time,2024-03-10T00:44
document_id,363680
phrase_start_end,"(0, 19)"
phrase_type,obl_phrase
sentence_id,6857811
sentence_startend,"(172928, 173019)"
subcorpus,tea

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,3
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Inglise'],Inglise õiguses,1 2 nmod,H,OTHER,ner,1,-
['õiguses'],Inglise õiguses,2 0 root,S,OTHER,ner,0,-


text
ühe väga hea vene kirjanikuga

0,1
document_creation_time,2024-03-10T00:44
document_id,388017
phrase_start_end,"(13, 42)"
phrase_type,obl_phrase
sentence_id,7931955
sentence_startend,"(1150, 1298)"
subcorpus,ilu

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,5
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,5
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,5
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,5
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['vene'],vene kirjanikuga,1 2 amod,G,OTHER,ner,1,-
['kirjanikuga'],vene kirjanikuga,2 0 root,S,OTHER,ner,0,-


text
Moskva-meelse Najibullah

0,1
document_creation_time,2024-03-10T00:44
document_id,284764
phrase_start_end,"(21, 45)"
phrase_type,nsubj_phrase
sentence_id,5368905
sentence_startend,"(955, 1075)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,4
compound_tokens,"type, normalized",,tokens,False,1
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Moskva-meelse'],Moskva-meelse Najibullah,1 2 amod,A,OTHER,ner,0,-
['Najibullah'],Moskva-meelse Najibullah,2 0 root,H,OTHER,ner,1,-


text
Mauretaania valitsusliikmed

0,1
document_creation_time,2024-03-10T00:44
document_id,554072
phrase_start_end,"(2, 29)"
phrase_type,nsubj_phrase
sentence_id,12591443
sentence_startend,"(1360, 1468)"
subcorpus,aja_sloleht

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Mauretaania'],Mauretaania valitsusliikmed,1 2 nmod,H,OTHER,ner,1,-
['valitsusliikmed'],Mauretaania valitsusliikmed,2 0 root,S,OTHER,ner,0,-


text
Euroopa Komisjoni

0,1
document_creation_time,2024-03-10T00:45
document_id,99035
phrase_start_end,"(85, 102)"
phrase_type,nmod_phrase
sentence_id,2280999
sentence_startend,"(560, 708)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Euroopa'],Euroopa Komisjoni,1 2 nmod,H,OTHER,ner,1,-
['Komisjoni'],Euroopa Komisjoni,2 0 root,H,OTHER,ner,1,-


text
Euroopa Komisjoni

0,1
document_creation_time,2024-03-10T00:45
document_id,99035
phrase_start_end,"(85, 102)"
phrase_type,nmod_phrase
sentence_id,2280999
sentence_startend,"(560, 708)"
subcorpus,aja_EPL

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,2
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,2
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,2
timexes,"tid, type, value, temporal_function, anchor_time_id, mod, quant, freq, begin_point, end_point, part_of_interval",,,False,0
ner,nertag,,words,False,0
stanza_syntax,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",morph_analysis,,False,2
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2


layer name,attributes,parent,enveloping,ambiguous,span count
pattern_consistency,"phrase, syntax, pos, ner, error_source, error_mask, correction",,words,True,2

text,phrase,syntax,pos,ner,error_source,error_mask,correction
['Euroopa'],Euroopa Komisjoni,1 2 nmod,H,OTHER,ner,1,-
['Komisjoni'],Euroopa Komisjoni,2 0 root,H,OTHER,ner,1,-


41
