# Syntax preprocessing taggers, rewriters and exporter

|tagger|rewriters|source attributes|target attributes|values|
|------|---------|-----------------|-----------------|------|
||PunctuatinonTypeRewriter|partofspeech, root|punctuation_type|```None```, 'Fst', 'Com', 'Col', ... |
||MorphToSyntaxMorphRewriter|partofspeech, form|partofspeech, form||
|PronounTypeTagger|PronounTypeRewriter|root, ending, clitic, partofspeech|pronoun_type| ```None```, ('det',), ('pers ps3',), ('pos', 'det', 'refl'), ... |
||RemoveDuplicateAnalysesRewriter|root, ending, clitic, partofspeech, form|||
||RemoveAdpositionAnalysesRewriter|partofspeech, form|||
||LetterCaseRewriter|word_text|cap|```None```, 'cap'|
|FiniteFormTagger|MorphToSyntaxMorphRewriter, FiniteFormRewriter|partofspeech, form|fin|```None```, ```True```, ```False```|
|VerbExtensionSuffixTagger|VerbExtensionSuffixRewriter|root|verb_extension_suffix|```None```,'tud','nud','mine','nu','tu','v','tav','mata','ja'|
|SubcatTagger|MorphToSyntaxMorphRewriter, SubcatRewriter|root, partofspeech, form|subcat|```None```, 'Intr', 'Part', 'gen', ...|
|MorphExtendedTagger|PunctuatinonTypeRewriter, MorphToSyntaxMorphRewriter, PronounTypeRewriter, RemoveDuplicateAnalysesRewriter, RemoveAdpositionAnalysesRewriter, LetterCaseRewriter, FiniteFormRewriter, VerbExtensionSuffixRewriter, SubcatRewriter|root, ending, clitic, partofspeech, form|partofspeech, form, punctuation_type, pronoun_type, cap, fin, verb_extension_suffix, subcat ||

In [1]:
from estnltk import Text

## PronounTypeTagger

In [2]:
from estnltk.taggers import PronounTypeTagger

tagger = PronounTypeTagger()
text = Text('Kumb, sina või mina?').tag_layer()
tagger.tag(text)
text['pronoun_type']

text,lemma,root,root_tokens,ending,clitic,form,partofspeech,pronoun_type
Kumb,kumb,kumb,"(kumb,)",0.0,,sg n,P,[rel]
",",",",",","(,,)",,,,Z,
sina,sina,sina,"(sina,)",0.0,,sg n,P,[ps2]
või,või,või,"(või,)",0.0,,,J,
mina,mina,mina,"(mina,)",0.0,,sg n,S,
?,?,?,"(?,)",,,,Z,


## FiniteFormTagger

In [3]:
from estnltk.taggers import FiniteFormTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/tmorftrtabel.txt'

tagger = FiniteFormTagger(fs_to_synt_rules_file=fsToSyntFulesFile)
text = Text('laulma hüpelnud tantsija').tag_layer()
tagger.tag(text)
text['finite_form']

text,lemma,root,root_tokens,ending,clitic,form,partofspeech,fin
laulma,laulma,laul,"(laul,)",ma,,mod sup ps ill,V,False
,laulma,laul,"(laul,)",ma,,aux sup ps ill,V,False
,laulma,laul,"(laul,)",ma,,main sup ps ill,V,False
hüpelnud,hüplema,hüple,"(hüple,)",nud,,mod indic impf ps neg,V,True
,hüplema,hüple,"(hüple,)",nud,,mod partic past ps,V,False
,hüplema,hüple,"(hüple,)",nud,,aux indic impf ps neg,V,True
,hüplema,hüple,"(hüple,)",nud,,aux partic past ps,V,False
,hüplema,hüple,"(hüple,)",nud,,main indic impf ps neg,V,True
,hüplema,hüple,"(hüple,)",nud,,main partic past ps,V,False
,hüpelnud,hüpel=nud,"(hüpelnud,)",0,,pos,A,


## VerbExtensionSuffixTagger

In [4]:
from estnltk.taggers import VerbExtensionSuffixTagger

tagger = VerbExtensionSuffixTagger()
text = Text('Laulev hüpelnud tantsija').tag_layer()
tagger.tag(text)
text['verb_extension_suffix']

text,lemma,root,root_tokens,ending,clitic,form,partofspeech,verb_extension_suffix
Laulev,laulev,laulev,"(laulev,)",0,,sg n,A,[]
hüpelnud,hüplema,hüple,"(hüple,)",nud,,nud,V,[]
,hüpelnud,hüpel=nud,"(hüpelnud,)",0,,,A,[nud]
,hüpelnud,hüpel=nud,"(hüpelnud,)",0,,sg n,A,[nud]
,hüpelnud,hüpel=nud,"(hüpelnud,)",d,,pl n,A,[nud]
tantsija,tantsija,tantsija,"(tantsija,)",0,,sg n,S,[]


## SubcatTagger

In [5]:
from estnltk.taggers import SubcatTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/tmorftrtabel.txt'
subcatFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/abileksikon06utf.lx'

tagger = SubcatTagger(fs_to_synt_rules_file=fsToSyntFulesFile, 
                      subcat_rules_file=subcatFile)
text = Text('Järel juurduma').tag_layer()
tagger.tag(text)
text['subcat']

text,lemma,root,root_tokens,ending,clitic,form,partofspeech,subcat
Järel,järel,järel,"(järel,)",0,,post,K,[gen]
,järel,järel,"(järel,)",0,,pre,K,
juurduma,juurduma,juurdu,"(juurdu,)",ma,,mod sup ps ill,V,[Intr]
,juurduma,juurdu,"(juurdu,)",ma,,aux sup ps ill,V,[Intr]
,juurduma,juurdu,"(juurdu,)",ma,,main sup ps ill,V,[Intr]


## MorphExtendedTagger

In [6]:
from estnltk.taggers import MorphExtendedTagger

fsToSyntFulesFile = '../estnltk/rewriting/syntax_preprocessing/rules_files/tmorftrtabel.txt'
subcat_file = '../estnltk/rewriting/syntax_preprocessing/rules_files/abileksikon06utf.lx'

tagger = MorphExtendedTagger(fs_to_synt_rules_file=fsToSyntFulesFile, 
                             allow_to_remove_all=False, 
                             subcat_rules_file=subcat_file)
text = Text('Ta on rääkinud!').tag_layer()
tagger.tag(text)
text['morph_extended']

text,word_text,lemma,root,ending,clitic,partofspeech,form,punctuation_type,pronoun_type,letter_case,fin,verb_extension_suffix,subcat
Ta,Ta,tema,tema,0,,P,sg nom,,[ps3],cap,,[],
on,on,olema,ole,0,,V,mod indic pres ps3 sg ps af,,,,True,[],[Intr]
,on,olema,ole,0,,V,aux indic pres ps3 sg ps af,,,,True,[],[Intr]
,on,olema,ole,0,,V,main indic pres ps3 sg ps af,,,,True,[],[Intr]
,on,olema,ole,0,,V,mod indic pres ps3 pl ps af,,,,True,[],[Intr]
,on,olema,ole,0,,V,aux indic pres ps3 pl ps af,,,,True,[],[Intr]
,on,olema,ole,0,,V,main indic pres ps3 pl ps af,,,,True,[],[Intr]
rääkinud,rääkinud,rääkima,rääki,nud,,V,mod indic impf ps neg,,,,True,[],"[Part-P, El]"
,rääkinud,rääkima,rääki,nud,,V,mod partic past ps,,,,False,[],"[Part-P, El]"
,rääkinud,rääkima,rääki,nud,,V,aux indic impf ps neg,,,,True,[],"[Part-P, El]"


# CG3 exporter

In [7]:
from estnltk.converters.CG3_exporter import export_CG3
text = Text('Lähme! Ta on rääkinud.')
text.analyse('syntax_preprocessing')
export_CG3(text)

['"<s>"',
 '"<Lähme>"',
 '    "mine" Lme V mod indic pres ps1 pl ps af cap <FinV>',
 '    "mine" Lme V aux indic pres ps1 pl ps af cap <FinV>',
 '    "mine" Lme V main indic pres ps1 pl ps af cap <FinV>',
 '"<!>"',
 '    "!" Z Exc',
 '"</s>"',
 '"<s>"',
 '"<Ta>"',
 '    "tema" L0 P pers ps3 sg nom cap',
 '"<on>"',
 '    "ole" L0 V mod indic pres ps3 sg ps af <FinV> <Intr>',
 '    "ole" L0 V aux indic pres ps3 sg ps af <FinV> <Intr>',
 '    "ole" L0 V main indic pres ps3 sg ps af <FinV> <Intr>',
 '    "ole" L0 V mod indic pres ps3 pl ps af <FinV> <Intr>',
 '    "ole" L0 V aux indic pres ps3 pl ps af <FinV> <Intr>',
 '    "ole" L0 V main indic pres ps3 pl ps af <FinV> <Intr>',
 '"<rääkinud>"',
 '    "rääki" Lnud V mod indic impf ps neg <FinV> <Part-P> <El>',
 '    "rääki" Lnud V mod partic past ps <Part-P> <El>',
 '    "rääki" Lnud V aux indic impf ps neg <FinV> <Part-P> <El>',
 '    "rääki" Lnud V aux partic past ps <Part-P> <El>',
 '    "rääki" Lnud V main indic impf ps neg <FinV> <Par