# PhraseListTagger
## Example 1

In [1]:
from estnltk import Text
text = Text('Patsient lasi jalga, sest vasaku jala suure varba pika \
            painutajalihase kõõluse rebend ajas tal kopsu üle maksa.')
text.analyse('morphology')

text
"Patsient lasi jalga, sest vasaku jala suure varba pika painutajalihase kõõluse rebend ajas tal kopsu üle maksa."

layer name,attributes,parent,enveloping,ambiguous,span count
words,normalized_form,,,False,19
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,19


In [2]:
from estnltk.finite_grammar import PhraseListTagger

phrase_list = [
               ('jalg'),
               ('vasak', 'jalg',),
               ('parem', 'jalg',),
               ('kops',),
               ('vasak', 'kops'),
               ('parem', 'kops'),
               ('kõõlus',),
               ('lihas',),
               ('maks',),
               ('neer',),
               ('parem', 'neer'),
               ('vasak', 'neer'),
               ('varvas',),
               ('suur', 'varvas'),
              ]

def decorator(text, span, phrase):
    return {'match': phrase, 'dfsd':None}

def consistency_checker(text, span, phrase):
    return True

tagger = PhraseListTagger(layer_name='body_parts',
                          input_layer='morph_analysis',
                          input_attribute='lemma',
                          phrase_list=phrase_list,
                          attributes=('match',),
                          decorator=decorator,
                          consistency_checker=consistency_checker,
                          conflict_resolving_strategy= 'MAX')
tagger

name,layer,attributes,depends_on
PhraseListTagger,body_parts,"(match,)",[morph_analysis]

0,1
conflict_resolving_strategy,MAX
decorator,<function decorator at 0x7fbafb7a8ea0>
phrase_list,14 phrases
input_layer,morph_analysis
consistency_checker,<function consistency_checker at 0x7fbafb7a8b70>
attributes,"(match,)"
input_attribute,lemma


In [3]:
tagger.tag(text)

text
"Patsient lasi jalga, sest vasaku jala suure varba pika painutajalihase kõõluse rebend ajas tal kopsu üle maksa."

layer name,attributes,parent,enveloping,ambiguous,span count
words,normalized_form,,,False,19
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,19
body_parts,match,,morph_analysis,False,4


In [4]:
text['body_parts']

layer name,attributes,parent,enveloping,ambiguous,span count
body_parts,match,,morph_analysis,False,4

text,match
vasaku jala,"(vasak, jalg)"
suure varba,"(suur, varvas)"
kõõluse,"(kõõlus,)"
kopsu,"(kops,)"


Note that 'maksa' is not tagged. This is because Vabamorf thinks its lemma is 'maksma'.

## Example 2: postprocessing
Suppose that we want to postprocess the layer before adding it to the text object. Here we add an attribute 'latin_term' to the layer.

In [5]:
text = Text('Vasaku jala suure varba pika painutajalihase kõõluse rebend ajas patsiendi kopsu üle maksa.')
text.analyse('morphology')
latin_dict = {('suur', 'varvas'):'hallux', ('kõõlus',):'tendo', ('kops',):'pulmo'}

layer = tagger.tag(text, return_layer=True)
layer.attributes = ('match','latin_term')
for s in layer:
    s.latin_term = latin_dict[s.match]
text['body_parts'] = layer
text['body_parts']

layer name,attributes,parent,enveloping,ambiguous,span count
body_parts,"match, latin_term",,morph_analysis,False,3

text,match,latin_term
suure varba,"(suur, varvas)",hallux
kõõluse,"(kõõlus,)",tendo
kopsu,"(kops,)",pulmo


## Example 3

In [6]:
text = Text('Naeris naeris.')
text.analyse('morphology')

text
Naeris naeris.

layer name,attributes,parent,enveloping,ambiguous,span count
words,normalized_form,,,False,3
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3


Create one more layer of morphological analysis.

In [7]:
from estnltk.taggers import VabamorfTagger
tagger = VabamorfTagger(layer_name='morph_analysis_no_disambiguate',
                        disambiguate=False)
tagger.tag(text)

text
Naeris naeris.

layer name,attributes,parent,enveloping,ambiguous,span count
words,normalized_form,,,False,3
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3
morph_analysis_no_disambiguate,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3


In [8]:
from estnltk.resolve_layer_dag import make_resolver

def decorator(text, span, phrase):
    return {'match': phrase}

tagger = PhraseListTagger('test',
                          'morph_analysis_no_disambiguate',
                          'lemma',
                          [
                            ('naerma', 'naerma'),
                            ('naerma', 'naeris'),
                            ('naeris', 'naerma'),
                            ('naeris', 'naeris'),
                          ],
                          attributes=('match',),
                          decorator=decorator,
                          conflict_resolving_strategy='ALL')
tagger.tag(text)
text['test']

layer name,attributes,parent,enveloping,ambiguous,span count
test,match,,morph_analysis_no_disambiguate,False,4

text,match
Naeris naeris,"(naerma, naerma)"
Naeris naeris,"(naerma, naeris)"
Naeris naeris,"(naeris, naerma)"
Naeris naeris,"(naeris, naeris)"


# Millised peaksid olema decorator'i, consistency_checker'i argumendid?

Näiteks olgu meil mitmene morph_analysis kiht ja matchime lemmasid ('naeris', 'naerma'). Kui anname ette mitmese spanni (seitsmene spanlist + neljane spanlist), siis ei ole üheselt selge milliseid lemmasid õnnestus matchida. Kui anname ette konkreetsed spanid, mis matchisid, näiteks kuuenda ja esimese, siis pole kindel, et parem poleks olnud ette anda hoopis seitsmendat ja esimest.

In [9]:
text.morph_analysis_no_disambiguate

layer name,attributes,parent,enveloping,ambiguous,span count
morph_analysis_no_disambiguate,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3

text,start,end,lemma,root,root_tokens,ending,clitic,form,partofspeech
Naeris,0,6,Naer,Naer,"(Naer,)",s,,sg in,H
,0,6,Naeri,Naeri,"(Naeri,)",s,,sg in,H
,0,6,Naeris,Naeris,"(Naeris,)",0,,sg n,H
,0,6,Naeris,Naeris,"(Naeris,)",s,,sg in,H
,0,6,naerma,naer,"(naer,)",is,,s,V
,0,6,naeris,naeris,"(naeris,)",0,,sg n,S
,0,6,naeris,naeris,"(naeris,)",s,,sg in,S
naeris,7,13,naerma,naer,"(naer,)",is,,s,V
,7,13,naeris,naeris,"(naeris,)",0,,sg n,S
,7,13,naeris,naeris,"(naeris,)",s,,sg in,S
