# Syntax preprocessing taggers and rewriters

|tagger|rewriters|source attributes|target attributes|values|
|------|---------|-----------------|-----------------|------|
||PunctuatinonTypeRewriter|partofspeech, root|punctuation_type|```None```, 'Fst', 'Com', 'Col', ... |
||MorphToSyntaxMorphRewriter|partofspeech, form|partofspeech, form||
|PronounTypeTagger|PronounTypeRewriter|root, ending, clitic|pronoun_type| ```None```, ('det',), ('pers ps3',), ('pos', 'det', 'refl'), ... |
||RemoveDuplicateAnalysesRewriter||||
||LetterCaseRewriter|word_text|cap|```None```, 'cap'|
|FiniteFormTagger|FiniteFormRewriter|partofspeech, form|fin|```None```, 'fin'|
|VerbExtensionSuffixTagger|VerbExtensionSuffixRewriter|root|verb_extension_suffix|```None```,'tud','nud','mine','nu','tu','v','tav','mata','ja'|
|SubcatTagger|SubcatRewriter|root, partofspeech, form|subcat|```None```, 'Intr', 'Part', 'gen', ...|
|MorphExtendedTagger|PunctuatinonTypeRewriter, MorphToSyntaxMorphRewriter, PronounTypeRewriter, FiniteFormRewriter, VerbExtensionSuffixRewriter, SubcatRewriter|root, ending, clitic, partofspeech, form|partofspeech, form, punctuation_type, pronoun_type, cap, fin, verb_extension_suffix, subcat ||

In [1]:
from estnltk.text import words_sentences

## MorphExtendedTagger

In [2]:
from estnltk.taggers import MorphExtendedTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/files/tmorftrtabel.txt'
subcatFile = '../estnltk/rewriting/syntax_preprocessing/files/abileksikon06utf.lx'

tagger = MorphExtendedTagger(fs_to_synt_rules_file=fsToSyntFulesFile, 
                             allow_to_remove_all=False, 
                             subcat_rules_file=subcatFile)
text = words_sentences('Täna on 15. detsember.')

tagger.tag(text)
text.morph_extended

zmpdcfps

SL[SL[Span(Täna, {'fin': None, 'root': 'täna', 'pronoun_type': None, 'punctuation_type': None, 'form': '', 'clitic': '', 'partofspeech': 'D', 'subcat': None, 'word_text': 'Täna', 'letter_case': 'cap', 'verb_extension_suffix': None, 'ending': '0'})],
SL[Span(on, {'fin': '<FinV>', 'root': 'ole', 'pronoun_type': None, 'punctuation_type': None, 'form': 'mod indic pres ps3 sg ps af', 'clitic': '', 'partofspeech': 'V', 'subcat': ['Intr'], 'word_text': 'on', 'letter_case': None, 'verb_extension_suffix': None, 'ending': '0'}),
Span(on, {'fin': '<FinV>', 'root': 'ole', 'pronoun_type': None, 'punctuation_type': None, 'form': 'aux indic pres ps3 sg ps af', 'clitic': '', 'partofspeech': 'V', 'subcat': ['Intr'], 'word_text': 'on', 'letter_case': None, 'verb_extension_suffix': None, 'ending': '0'}),
Span(on, {'fin': '<FinV>', 'root': 'ole', 'pronoun_type': None, 'punctuation_type': None, 'form': 'main indic pres ps3 sg ps af', 'clitic': '', 'partofspeech': 'V', 'subcat': ['Intr'], 'word_text': 'on',

## PronounTypeTagger

In [3]:
from estnltk.taggers import PronounTypeTagger

tagger = PronounTypeTagger()
text = words_sentences('Kumb, sina või mina?')
tagger.tag(text)
text.morph_extended

SL[SL[Span(Kumb, {'root': 'kumb', 'pronoun_type': ('rel',), 'form': 'sg n', 'clitic': '', 'root_tokens': ['kumb'], 'lemma': 'kumb', 'partofspeech': 'P', 'ending': '0'})],
SL[Span(,, {'root': ',', 'pronoun_type': None, 'form': '', 'clitic': '', 'root_tokens': [','], 'lemma': ',', 'partofspeech': 'Z', 'ending': ''})],
SL[Span(sina, {'root': 'sina', 'pronoun_type': ('pers ps2',), 'form': 'sg n', 'clitic': '', 'root_tokens': ['sina'], 'lemma': 'sina', 'partofspeech': 'P', 'ending': '0'})],
SL[Span(või, {'root': 'või', 'pronoun_type': None, 'form': '', 'clitic': '', 'root_tokens': ['või'], 'lemma': 'või', 'partofspeech': 'J', 'ending': '0'})],
SL[Span(mina, {'root': 'mina', 'pronoun_type': None, 'form': 'sg n', 'clitic': '', 'root_tokens': ['mina'], 'lemma': 'mina', 'partofspeech': 'S', 'ending': '0'})],
SL[Span(?, {'root': '?', 'pronoun_type': None, 'form': '', 'clitic': '', 'root_tokens': ['?'], 'lemma': '?', 'partofspeech': 'Z', 'ending': ''})]]