# GapsTagger

In [1]:
from estnltk import Text
from estnltk.layer import Layer
from estnltk.spans import Span
from estnltk.taggers.gaps_tagging.gaps_tagger import GapsTagger

In [2]:
text = Text('Üks kaks kolm neli viis kuus seitse.')
layer_1 = Layer('test_1')
layer_1.add_span(Span(4, 8))
layer_1.add_span(Span(9, 13))
layer_1.add_span(Span(24, 28))
text['test_1'] = layer_1

layer_2 = Layer('test_2')
layer_2.add_span(Span(4, 8))
layer_2.add_span(Span(9, 18))
layer_2.add_span(Span(35, 36))
text['test_2'] = layer_2

## Example 1

In [3]:
gaps_tagger = GapsTagger('simple_gaps', ['test_1', 'test_2'])
gaps_tagger.tag(text)
text.simple_gaps

layer name,attributes,parent,enveloping,ambiguous,span count
simple_gaps,,,,False,4

text,start,end
Üks,0,4
,8,9
viis,18,24
seitse,28,35


The following illustrates examples 1 and 2.

    text:           'Üks kaks kolm neli viis kuus seitse.'
    test_1:             'kaks'kolm'         'kuus'      
    test_2:             'kaks'kolm neli'               '.'
    simple_gaps:    'Üks '  ' '       ' viis '  ' seitse'
    gaps:           'Üks'              'viis'    'seitse'

## Example 2

In [4]:
def trim(t:str) -> str:
    return t.strip()

def decorator(text:str):
    return {'gap_length':len(text)}

gaps_tagger = GapsTagger(layer_name='gaps',
                         input_layers=['test_1', 'test_2'],
                         trim=trim,
                         decorator=decorator,
                         attributes=['gap_length'])
gaps_tagger

name,layer,attributes,depends_on
GapsTagger,gaps,"(gap_length,)","[test_1, test_2]"

0,1
decorator function,<function decorator at 0x7fea2e6027b8>
trim function,<function trim at 0x7fea2e6029d8>


In [5]:
gaps_tagger.tag(text)
text.gaps

layer name,attributes,parent,enveloping,ambiguous,span count
gaps,gap_length,,,False,3

text,start,end,gap_length
Üks,0,3,3
viis,19,23,4
seitse,29,35,6


## Example 3: enveloping layers

The gaps layer of enveloping layers is an unambiguous enveloping layer. All input layers must be enveloping the same layer. Input layers can be ambiguous or unambiguous.

In [6]:
from estnltk.spans import SpanList

text = Text('Üks kaks kolm neli viis kuus seitse.')
text.tag_layer(['words'])

layer = Layer('test_3', enveloping='words')

spl = SpanList()
spl.spans = text.words[0:2]
layer.add_span(spl)

spl = SpanList()
spl.spans = text.words[3:4]
layer.add_span(spl)

text['test_3'] = layer
text.test_3

layer name,attributes,parent,enveloping,ambiguous,span count
test_3,,,words,False,2

text,start,end
Üks kaks,0,8
neli,14,18


In [7]:
layer = Layer('test_4', enveloping='words', ambiguous=True)

spl = SpanList()
spl.spans = text.words[3:5]
layer.add_span(spl)

spl = SpanList()
spl.spans = text.words[3:5]
layer.add_span(spl)

text['test_4'] = layer
text.test_4.spans

In [8]:
def decorator(spans):
    return {'gap_word_count': len(spans)}

gaps_tagger = GapsTagger(layer_name='gaps',
                         input_layers=['test_3', 'test_4'],
                         decorator=decorator,
                         attributes=['gap_word_count'])
gaps_tagger

name,layer,attributes,depends_on
GapsTagger,gaps,"(gap_word_count,)","[test_3, test_4]"

0,1
decorator function,<function decorator at 0x7fea2e602620>
trim function,<function trim at 0x7fea2e602510>


In [9]:
gaps_tagger.tag(text)
text.gaps

layer name,attributes,parent,enveloping,ambiguous,span count
gaps,gap_word_count,,words,False,2

text,start,end,gap_word_count
kolm,9,13,1
kuus seitse.,24,36,3
