# Syntax preprocessing taggers and rewriters

|tagger|rewriters|source attributes|target attributes|values|
|------|---------|-----------------|-----------------|------|
||PunctuatinonTypeRewriter|partofspeech, root|punctuation_type|```None```, 'Fst', 'Com', 'Col', ... |
||MorphToSyntaxMorphRewriter|partofspeech, form|partofspeech, form||
|PronounTypeTagger|PronounTypeRewriter|root, ending, clitic, partofspeech|pronoun_type| ```None```, ('det',), ('pers ps3',), ('pos', 'det', 'refl'), ... |
||RemoveDuplicateAnalysesRewriter|root, ending, clitic, partofspeech, form|||
||RemoveAdpositionAnalysesRewriter|partofspeech, form|||
||LetterCaseRewriter|word_text|cap|```None```, 'cap'|
|FiniteFormTagger|MorphToSyntaxMorphRewriter, FiniteFormRewriter|partofspeech, form|fin|```None```, ```True```, ```False```|
|VerbExtensionSuffixTagger|VerbExtensionSuffixRewriter|root|verb_extension_suffix|```None```,'tud','nud','mine','nu','tu','v','tav','mata','ja'|
|SubcatTagger|MorphToSyntaxMorphRewriter, SubcatRewriter|root, partofspeech, form|subcat|```None```, 'Intr', 'Part', 'gen', ...|
|MorphExtendedTagger|PunctuatinonTypeRewriter, MorphToSyntaxMorphRewriter, PronounTypeRewriter, RemoveDuplicateAnalysesRewriter, RemoveAdpositionAnalysesRewriter, LetterCaseRewriter, FiniteFormRewriter, VerbExtensionSuffixRewriter, SubcatRewriter|root, ending, clitic, partofspeech, form|partofspeech, form, punctuation_type, pronoun_type, cap, fin, verb_extension_suffix, subcat ||

In [1]:
from estnltk.text import words_sentences

## PronounTypeTagger

In [2]:
from estnltk.taggers import PronounTypeTagger

tagger = PronounTypeTagger()
text = words_sentences('Kumb, sina või mina?')
tagger.tag(text)
text.pronoun_type

SL[SL[Span(Kumb, {'ending': '0', 'pronoun_type': ('rel',), 'root': 'kumb', 'clitic': '', 'form': 'sg n', 'lemma': 'kumb', 'root_tokens': ['kumb'], 'partofspeech': 'P'})],
SL[Span(,, {'ending': '', 'pronoun_type': None, 'root': ',', 'clitic': '', 'form': '', 'lemma': ',', 'root_tokens': [','], 'partofspeech': 'Z'})],
SL[Span(sina, {'ending': '0', 'pronoun_type': ('pers ps2',), 'root': 'sina', 'clitic': '', 'form': 'sg n', 'lemma': 'sina', 'root_tokens': ['sina'], 'partofspeech': 'P'})],
SL[Span(või, {'ending': '0', 'pronoun_type': None, 'root': 'või', 'clitic': '', 'form': '', 'lemma': 'või', 'root_tokens': ['või'], 'partofspeech': 'J'})],
SL[Span(mina, {'ending': '0', 'pronoun_type': None, 'root': 'mina', 'clitic': '', 'form': 'sg n', 'lemma': 'mina', 'root_tokens': ['mina'], 'partofspeech': 'S'})],
SL[Span(?, {'ending': '', 'pronoun_type': None, 'root': '?', 'clitic': '', 'form': '', 'lemma': '?', 'root_tokens': ['?'], 'partofspeech': 'Z'})]]

## FiniteFormTagger

In [3]:
from estnltk.taggers import FiniteFormTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/tmorftrtabel.txt'

tagger = FiniteFormTagger(fs_to_synt_rules_file=fsToSyntFulesFile)
text = words_sentences('laulma hüpelnud tantsija')
tagger.tag(text)
text.finite_form

SL[SL[Span(laulma, {'ending': 'ma', 'root': 'laul', 'clitic': '', 'form': 'mod sup ps ill', 'fin': False, 'lemma': 'laulma', 'root_tokens': ['laul'], 'partofspeech': 'V'}),
Span(laulma, {'ending': 'ma', 'root': 'laul', 'clitic': '', 'form': 'aux sup ps ill', 'fin': False, 'lemma': 'laulma', 'root_tokens': ['laul'], 'partofspeech': 'V'}),
Span(laulma, {'ending': 'ma', 'root': 'laul', 'clitic': '', 'form': 'main sup ps ill', 'fin': False, 'lemma': 'laulma', 'root_tokens': ['laul'], 'partofspeech': 'V'})],
SL[Span(hüpelnud, {'ending': 'nud', 'root': 'hüple', 'clitic': '', 'form': 'mod indic impf ps neg', 'fin': True, 'lemma': 'hüplema', 'root_tokens': ['hüple'], 'partofspeech': 'V'}),
Span(hüpelnud, {'ending': 'nud', 'root': 'hüple', 'clitic': '', 'form': 'mod partic past ps', 'fin': False, 'lemma': 'hüplema', 'root_tokens': ['hüple'], 'partofspeech': 'V'}),
Span(hüpelnud, {'ending': 'nud', 'root': 'hüple', 'clitic': '', 'form': 'aux indic impf ps neg', 'fin': True, 'lemma': 'hüplema', 'r

## VerbExtensionSuffixTagger

In [4]:
from estnltk.taggers import VerbExtensionSuffixTagger

tagger = VerbExtensionSuffixTagger()
text = words_sentences('Laulev hüpelnud tantsija')
tagger.tag(text)
text.verb_extension_suffix

SL[SL[Span(Laulev, {'ending': '0', 'root': 'laulev', 'clitic': '', 'form': 'sg n', 'verb_extension_suffix': None, 'lemma': 'laulev', 'root_tokens': ['laulev'], 'partofspeech': 'A'})],
SL[Span(hüpelnud, {'ending': 'nud', 'root': 'hüple', 'clitic': '', 'form': 'nud', 'verb_extension_suffix': None, 'lemma': 'hüplema', 'root_tokens': ['hüple'], 'partofspeech': 'V'}),
Span(hüpelnud, {'ending': '0', 'root': 'hüpel=nud', 'clitic': '', 'form': '', 'verb_extension_suffix': 'nud', 'lemma': 'hüpelnud', 'root_tokens': ['hüpelnud'], 'partofspeech': 'A'}),
Span(hüpelnud, {'ending': '0', 'root': 'hüpel=nud', 'clitic': '', 'form': 'sg n', 'verb_extension_suffix': 'nud', 'lemma': 'hüpelnud', 'root_tokens': ['hüpelnud'], 'partofspeech': 'A'}),
Span(hüpelnud, {'ending': 'd', 'root': 'hüpel=nud', 'clitic': '', 'form': 'pl n', 'verb_extension_suffix': 'nud', 'lemma': 'hüpelnud', 'root_tokens': ['hüpelnud'], 'partofspeech': 'A'})],
SL[Span(tantsija, {'ending': '0', 'root': 'tantsija', 'clitic': '', 'form': 

## SubcatTagger

In [5]:
from estnltk.taggers import SubcatTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/tmorftrtabel.txt'
subcatFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/abileksikon06utf.lx'

tagger = SubcatTagger(fs_to_synt_rules_file=fsToSyntFulesFile, 
                      subcat_rules_file=subcatFile)
text = words_sentences('Järel juurduma')
tagger.tag(text)
text.subcat

SL[SL[Span(Järel, {'ending': '0', 'root': 'järel', 'clitic': '', 'form': 'post', 'subcat': ['gen'], 'lemma': 'järel', 'root_tokens': ['järel'], 'partofspeech': 'K'}),
Span(Järel, {'ending': '0', 'root': 'järel', 'clitic': '', 'form': 'pre', 'subcat': None, 'lemma': 'järel', 'root_tokens': ['järel'], 'partofspeech': 'K'})],
SL[Span(juurduma, {'ending': 'ma', 'root': 'juurdu', 'clitic': '', 'form': 'mod sup ps ill', 'subcat': ['Intr'], 'lemma': 'juurduma', 'root_tokens': ['juurdu'], 'partofspeech': 'V'}),
Span(juurduma, {'ending': 'ma', 'root': 'juurdu', 'clitic': '', 'form': 'aux sup ps ill', 'subcat': ['Intr'], 'lemma': 'juurduma', 'root_tokens': ['juurdu'], 'partofspeech': 'V'}),
Span(juurduma, {'ending': 'ma', 'root': 'juurdu', 'clitic': '', 'form': 'main sup ps ill', 'subcat': ['Intr'], 'lemma': 'juurduma', 'root_tokens': ['juurdu'], 'partofspeech': 'V'})]]

## MorphExtendedTagger

In [6]:
from estnltk.taggers import MorphExtendedTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/tmorftrtabel.txt'
subcat_file = '../estnltk/rewriting/syntax_preprocessing/rules_files/abileksikon06utf.lx'
subcat_extra_file = '../estnltk/rewriting/syntax_preprocessing/rules_files/abileksikon_extra.lx'

tagger = MorphExtendedTagger(fs_to_synt_rules_file=fsToSyntFulesFile, 
                             allow_to_remove_all=False, 
                             subcat_rules_file=subcat_file,
                             subcat_rules_extra_file=subcat_extra_file)
text = words_sentences('Ta on rääkinud!')
tagger.tag(text)
text.morph_extended

SL[SL[Span(Ta, {'ending': '0', 'pronoun_type': ('pers ps3',), 'root': 'tema', 'clitic': '', 'verb_extension_suffix': None, 'subcat': None, 'fin': None, 'partofspeech': 'P', 'punctuation_type': None, 'form': 'sg nom', 'word_text': 'Ta', 'letter_case': 'cap'})],
SL[Span(on, {'ending': '0', 'pronoun_type': None, 'root': 'ole', 'clitic': '', 'verb_extension_suffix': None, 'subcat': ['Intr'], 'fin': True, 'partofspeech': 'V', 'punctuation_type': None, 'form': 'mod indic pres ps3 sg ps af', 'word_text': 'on', 'letter_case': None}),
Span(on, {'ending': '0', 'pronoun_type': None, 'root': 'ole', 'clitic': '', 'verb_extension_suffix': None, 'subcat': ['Intr'], 'fin': True, 'partofspeech': 'V', 'punctuation_type': None, 'form': 'aux indic pres ps3 sg ps af', 'word_text': 'on', 'letter_case': None}),
Span(on, {'ending': '0', 'pronoun_type': None, 'root': 'ole', 'clitic': '', 'verb_extension_suffix': None, 'subcat': ['Intr'], 'fin': True, 'partofspeech': 'V', 'punctuation_type': None, 'form': 'main