# Disambiguator (a system level tagger)

In [1]:
from estnltk import Text, Layer
from estnltk.taggers import Disambiguator

## Disambiguate a simple ambiguous layer

In [2]:
text = Text('Tere, maailm!')

layer_1 = Layer(name='simple_ambiguous', attributes=['attr_1', 'attr_2'], ambiguous=True, text_object=text)
layer_1.add_annotation(( 0,  4), attr_1= 1, attr_2= 2)
layer_1.add_annotation(( 0,  4), attr_1= 3, attr_2= 4)
layer_1.add_annotation(( 0,  4), attr_1= 5, attr_2= 6)
layer_1.add_annotation(( 4,  5), attr_1= 7, attr_2= 8)
layer_1.add_annotation(( 4,  5), attr_1= 9, attr_2=10)
layer_1.add_annotation(( 0,  4), attr_1=11, attr_2=12)
layer_1.add_annotation(( 6, 12), attr_1=13, attr_2=14)
layer_1.add_annotation(( 6, 12), attr_1=15, attr_2=16)
layer_1.add_annotation((12, 13), attr_1=17, attr_2=18)
text.add_layer(layer_1)

layer_1

layer name,attributes,parent,enveloping,ambiguous,span count
simple_ambiguous,"attr_1, attr_2",,,True,4

text,attr_1,attr_2
Tere,1,2
,3,4
,5,6
,11,12
",",7,8
,9,10
maailm,13,14
,15,16
!,17,18


In [3]:
def decorator(ambiguous_span, raw_text):
    attr_1 = 0
    for annotation in ambiguous_span.annotations:
        attr_1 += annotation.attr_1
    return {'attr_1': attr_1}

tagger_1 = Disambiguator(output_layer='simple',
                         input_layer='simple_ambiguous',
                         output_attributes=['attr_1'],
                         decorator=decorator)
tagger_1

name,output layer,output attributes,input layers
Disambiguator,simple,"('attr_1',)","('simple_ambiguous',)"

0,1
decorator,<function __main__.decorator>


In [4]:
tagger_1.tag(text)
text.simple

layer name,attributes,parent,enveloping,ambiguous,span count
simple,attr_1,,,False,4

text,attr_1
Tere,20
",",16
maailm,28
!,17


## Disamiguate an enveloping ambiguous layer

In [5]:
layer_2 = Layer(name='enveloping_ambiguous',
                attributes=['attr_3'],
                enveloping='simple_ambiguous',
                ambiguous=True)

spans=text.simple_ambiguous[0:2]
layer_2.add_annotation(spans, attr_3=30)
layer_2.add_annotation(spans, attr_3=31)

spans=text.simple_ambiguous[2:4]
layer_2.add_annotation(spans, attr_3=32)


text.add_layer(layer_2)

layer_2

layer name,attributes,parent,enveloping,ambiguous,span count
enveloping_ambiguous,attr_3,,simple_ambiguous,True,2

text,attr_3
"['Tere', ',']",30
,31
"['maailm', '!']",32


In [6]:
def decorator(ambiguous_span, raw_text):
    return {'attr_1': len(ambiguous_span)}

tagger_2 = Disambiguator(output_layer='enveloping',
                         input_layer='enveloping_ambiguous',
                         output_attributes=['attr_1',],
                         decorator=decorator
                        )
tagger_2

name,output layer,output attributes,input layers
Disambiguator,enveloping,"('attr_1',)","('enveloping_ambiguous',)"

0,1
decorator,<function __main__.decorator>


In [7]:
tagger_2.tag(text)
text.enveloping

layer name,attributes,parent,enveloping,ambiguous,span count
enveloping,attr_1,,simple_ambiguous,False,2

text,attr_1
"['Tere', ',']",2
"['maailm', '!']",2
