# GapsTagger

In [1]:
from estnltk import Text
from estnltk.layer import Layer
from estnltk.spans import Span
from estnltk.taggers.gaps_tagging.gaps_tagger import GapsTagger

In [2]:
text = Text('Üks kaks kolm neli viis kuus seitse.')
layer_1 = Layer('test_1')
layer_1.add_span(Span(4, 8))
layer_1.add_span(Span(9, 13))
layer_1.add_span(Span(24, 28))
text['test_1'] = layer_1

layer_2 = Layer('test_2')
layer_2.add_span(Span(4, 8))
layer_2.add_span(Span(9, 18))
layer_2.add_span(Span(35, 36))
text['test_2'] = layer_2

## Example 1

In [3]:
gaps_tagger = GapsTagger('simple_gaps', ['test_1', 'test_2'])
gaps_tagger.tag(text)
text.simple_gaps

layer name,attributes,parent,enveloping,ambiguous,span count
simple_gaps,,,,False,4

text,start,end
Üks,0,4
,8,9
viis,18,24
seitse,28,35


The following illustrates examples 1 and 2.

    text:           'Üks kaks kolm neli viis kuus seitse.'
    test_1:             'kaks'kolm'         'kuus'      
    test_2:             'kaks'kolm neli'               '.'
    simple_gaps:    'Üks '  ' '       ' viis '  ' seitse'
    gaps:           'Üks'              'viis'    'seitse'

## Example 2

In [4]:
def trim(text:str, s:int, e:int):
    t_0 = text[s:e]
    t_1 = t_0.strip()
    i = t_0.find(t_1)
    return (s+i, s+i+len(t_1))

def decorator(text:str):
    return {'gap_length':len(text)}

gaps_tagger = GapsTagger(layer_name='gaps',
                         input_layers=['test_1', 'test_2'],
                         trim=trim, decorator=decorator,
                         attributes=['gap_length'])
gaps_tagger.tag(text)
text.gaps

layer name,attributes,parent,enveloping,ambiguous,span count
gaps,gap_length,,,False,3

text,start,end,gap_length
Üks,0,3,3
viis,19,23,4
seitse,29,35,6


In [5]:
gaps_tagger

name,layer,attributes,depends_on
GapsTagger,gaps,"(gap_length,)","[test_1, test_2]"

0,1
decorator function,<function decorator at 0x7fedb7e29268>
trim function,<function trim at 0x7fedb7e292f0>


In [6]:
from estnltk.taggers.merge_tagging.merge_tagger import MergeTagger

merge_tagger = MergeTagger('merged', input_layers=['test_1', 'test_2'])
merge_tagger.tag(text)

In [7]:
text

text
Üks kaks kolm neli viis kuus seitse.

layer name,attributes,parent,enveloping,ambiguous,span count
simple_gaps,,,,False,4
gaps,gap_length,,,False,3
test_1,,,,False,3
merged,,,,False,6
test_2,,,,False,3


In [8]:
text.merged

layer name,attributes,parent,enveloping,ambiguous,span count
merged,,,,False,6

text,start,end
kaks,4,8
kaks,4,8
kolm,9,13
kolm neli,9,18
kuus,24,28
.,35,36


In [9]:
text.test_1

layer name,attributes,parent,enveloping,ambiguous,span count
test_1,,,,False,3

text,start,end
kaks,4,8
kolm,9,13
kuus,24,28


In [10]:
text.test_2

layer name,attributes,parent,enveloping,ambiguous,span count
test_2,,,,False,3

text,start,end
kaks,4,8
kolm neli,9,18
.,35,36
