In [1]:
from estnltk import Text
from estnltk.layer import Layer
from estnltk.spans import Span

## Minimal tagger example

In [2]:
from estnltk.taggers import TaggerNew

class MinimalTagger(TaggerNew):
    description = 'Minimal tagger example'
    attributes = ()
    depends_on = ()
    layer_name = 'minimal'
    conf_param = ()
    
    def __init__(self):
        pass

    def make_layer(self, raw_text, layers, status):
        return Layer(self.layer_name)

minimal_tagger = MinimalTagger()
minimal_tagger

name,layer,attributes,depends_on
MinimalTagger,minimal,(),()


In [3]:
text = Text('tere')
minimal_tagger.tag(text)

text
tere

layer name,attributes,parent,enveloping,ambiguous,span count
minimal,,,,False,0


In [4]:
text.minimal

layer name,attributes,parent,enveloping,ambiguous,span count
minimal,,,,False,0

text,start,end


## Example 2

In [5]:
import regex as re

class NumberTagger(TaggerNew):
    description = 'Tags numbers'
    attributes = ()
    depends_on = ()
    layer_name = 'numbers'
    conf_param = ['regex']
    
    def __init__(self):
        self.regex = re.compile('-?\d+')

    def make_layer(self, raw_text, layers, status):
        layer = Layer(self.layer_name)
        for m in self.regex.finditer(raw_text):
            span = Span(m.start(), m.end())
            layer.add_span(span)
        return layer

number_tagger = NumberTagger()
number_tagger

name,layer,attributes,depends_on
NumberTagger,numbers,(),()

0,1
regex,"regex.Regex('-?\\d+', flags=regex.V0)"


In [6]:
text = Text('-123,45')
number_tagger(text)
text.numbers

layer name,attributes,parent,enveloping,ambiguous,span count
numbers,,,,False,2

text,start,end
-123,0,4
45,5,7


In [7]:
from copy import deepcopy

class EvaluatingTagger(TaggerNew):
    description = 'Evaluating tagger'
    attributes = ['value']
    conf_param = []
    
    def __init__(self, layer_name='numbers', depends_on=['numbers']):
        self.layer_name = layer_name
        self.depends_on = depends_on

    def change_layer(self, raw_text, layers, status):
        layer = layers[self.depends_on[0]]
        layer.attributes += tuple(self.attributes)
        for span in layers[self.depends_on[0]]:
            span.value = int(span.text)
    
    def make_layer(self, raw_text, layers, status):
        layer = list(layers.values())[0]
        layer = deepcopy(layer)
        layer._bound = False
        self.change_layer(raw_text, {layer.name: layer}, status)
        layer.name = self.layer_name
        return layer

evaluating_tagger = EvaluatingTagger('number_values')
evaluating_tagger

name,layer,attributes,depends_on
EvaluatingTagger,number_values,['value'],['numbers']


In [8]:
evaluating_tagger.change(text)
text.numbers

layer name,attributes,parent,enveloping,ambiguous,span count
numbers,value,,,False,2

text,start,end,value
-123,0,4,-123
45,5,7,45


In [9]:
evaluating_tagger.tag(text)

text
-12345

layer name,attributes,parent,enveloping,ambiguous,span count
number_values,"value, value",,,False,2
numbers,value,,,False,2
