# Morphological analysis with premorph and postmorph

In [1]:
from estnltk.text import Text

By default premorph and postmorph are executed.

In [2]:
t = Text('tu-li 3daks').tag_layer()
t.morf_analysis

SL[SL[Span(tu-li, {'root_tokens': ['tule'], 'lemma': 'tulema', 'form': 's', 'clitic': '', 'root': 'tule', 'partofspeech': 'V', 'ending': 'i'})],
SL[Span(3daks, {'root_tokens': ['3.'], 'lemma': '3.', 'form': 'sg tr', 'clitic': '', 'root': '3.', 'partofspeech': 'O', 'ending': 'ks'})]]

This is equivalent to

In [3]:
from estnltk.taggers.premorph.premorf import WordNormalizingTagger
from estnltk.taggers.morf import VabamorfTagger
from estnltk.rewriting.postmorph.vabamorf_corrector import VabamorfCorrectionRewriter

vabamorf_corrector = VabamorfCorrectionRewriter(replace=True)

t = Text('tu-li 3ndaks')
t.tag_layer(['words'])
WordNormalizingTagger().tag(t)
VabamorfTagger(premorf_layer='normalized', postmorph_rewriter=vabamorf_corrector).tag(t)

t.morf_analysis

SL[SL[Span(tu-li, {'root_tokens': ['tule'], 'lemma': 'tulema', 'form': 's', 'clitic': '', 'root': 'tule', 'partofspeech': 'V', 'ending': 'i'})],
SL[Span(3ndaks, {'root_tokens': ['3.'], 'lemma': '3.', 'form': 'sg tr', 'clitic': '', 'root': '3.', 'partofspeech': 'O', 'ending': 'ks'})]]

Modify the code above to customize premorph or postmorph.

For example, turn off premorph and postmorph:

In [4]:
t = Text('tu-li 3ndaks')
t.tag_layer(['words'])
VabamorfTagger(premorf_layer=None, postmorph_rewriter=None).tag(t)

t.morf_analysis

SL[SL[Span(tu-li, {'root_tokens': ['tu', 'li'], 'lemma': 'tu-li', 'form': '?', 'clitic': '', 'root': 'tu-li', 'partofspeech': 'Y', 'ending': '0'})],
SL[Span(3ndaks, {'root_tokens': ['3s'], 'lemma': '3s', 'form': 'sg tr', 'clitic': '', 'root': '3=s', 'partofspeech': 'O', 'ending': 'ks'})]]

## Use the `morf_analysis` layer to create a `corrected_morph` layer

1. Create a text object.
2. Tag the `nomralized` layer (and also the `words` layer).
3. Create a layer `_morph` that contains the data from the layers `morf_analysis` and `normalized`.
5. Rewrite the `_morph` layer and get the `corrected_morph` layer as a result.
6. Attach the `corrected_morph` layer to the text object.

Now `text.corrected_morph` is the same as `t.morf_analysis` in the first example where premorph and postmorph are executed.

In [5]:
from estnltk.text import Span, Layer
from estnltk.rewriting.postmorph.vabamorf_corrector import VabamorfCorrectionRewriter

text = Text('tu-li 3ndaks')
text.tag_layer(['normalized'])
VabamorfTagger(premorf_layer='normalized', postmorph_rewriter=None).tag(text)

morph_attributes = ['form', 'root_tokens', 'clitic', 'partofspeech', 'ending', 'root', 'lemma']
attributes = morph_attributes + ['word_normal']

_morph = Layer(name='words',
               parent='words',
               ambiguous=True,
               attributes=attributes
               )

for word, analyses in zip(text.words, text.morf_analysis):
    for analysis in analyses:
        span = _morph.add_span(Span(parent=word))
        for attr in morph_attributes:
            setattr(span, attr, getattr(analysis, attr))
        setattr(span, 'word_normal', word.normal or word.text)


postmorph_rewriter = VabamorfCorrectionRewriter()

corrected_morph = _morph.rewrite(source_attributes=attributes,
                                 target_attributes=morph_attributes, 
                                 rules=postmorph_rewriter,
                                 name='corrected_morph',
                                 ambiguous=True)

text['corrected_morph'] = corrected_morph
text.corrected_morph

SL[SL[Span(tu-li, {'root_tokens': ['tule'], 'lemma': 'tulema', 'form': 's', 'clitic': '', 'root': 'tule', 'partofspeech': 'V', 'ending': 'i'})],
SL[Span(3ndaks, {'root_tokens': ['3.'], 'lemma': '3.', 'form': 'sg tr', 'clitic': '', 'root': '3.', 'partofspeech': 'O', 'ending': 'ks'})]]