In [1]:
from estnltk import Text
from estnltk.layer import Layer
from estnltk.spans import Span

## Minimal tagger example

In [2]:
from estnltk.taggers import TaggerNew

class MinimalTagger(TaggerNew):
    description = 'Minimal tagger example'
    conf_param = ()
    
    def __init__(self):
        self.output_attributes = ()
        self.input_layers = ()
        self.output_layer = 'minimal'

    def make_layer(self, raw_text, layers, status):
        return Layer(name=self.output_layer)

minimal_tagger = MinimalTagger()
minimal_tagger

name,output layer,output attributes,input layers
MinimalTagger,minimal,(),()


In [3]:
text = Text('tere')
minimal_tagger.tag(text)

text
tere

layer name,attributes,parent,enveloping,ambiguous,span count
minimal,,,,False,0


In [4]:
text.minimal

layer name,attributes,parent,enveloping,ambiguous,span count
minimal,,,,False,0

text,start,end


## Example 2

In [5]:
import regex as re


class NumberTagger(TaggerNew):
    description = 'Tags numbers'
    output_attributes = ()
    input_layers = ()
    output_layer = 'numbers'
    conf_param = ['regex']
    
    def __init__(self):
        self.regex = re.compile('-?\d+')

    def make_layer(self, raw_text, layers, status):
        layer = Layer(self.output_layer)
        for m in self.regex.finditer(raw_text):
            span = Span(m.start(), m.end())
            layer.add_span(span)
        return layer

number_tagger = NumberTagger()
number_tagger

name,output layer,output attributes,input layers
NumberTagger,numbers,(),()

0,1
regex,"regex.Regex('-?\\d+', flags=regex.V0)"


In [6]:
text = Text('-123,45')
number_tagger(text)
text.numbers

layer name,attributes,parent,enveloping,ambiguous,span count
numbers,,,,False,2

text,start,end
-123,0,4
45,5,7


# Retagger
Retagger changes an existing layer. The next retagger adds `value` attribute to the numbers layer.

In [7]:
from copy import deepcopy
from estnltk.taggers import Retagger


class EvaluatingRetagger(Retagger):
    description = 'Evaluating retagger'
    conf_param = []
    
    def __init__(self, output_layer='numbers', input_layers=['numbers']):
        self.output_layer = output_layer
        self.input_layers = input_layers
        self.output_attributes = ['value']

    def change_layer(self, raw_text, layers, status):
        layer = layers[self.input_layers[0]]
        layer.attributes += tuple(self.output_attributes)
        for span in layers[self.input_layers[0]]:
            span.value = int(span.text)


evaluating_tagger = EvaluatingRetagger('number_values')
evaluating_tagger

name,output layer,output attributes,input layers
EvaluatingRetagger,number_values,['value'],['numbers']


In [8]:
evaluating_tagger.retag(text)

text
-12345

layer name,attributes,parent,enveloping,ambiguous,span count
numbers,value,,,False,2


In [9]:
text.numbers

layer name,attributes,parent,enveloping,ambiguous,span count
numbers,value,,,False,2

text,start,end,value
-123,0,4,-123
45,5,7,45
