# Diff `measurements_old` and `measurements` layers

Connect to the database.

In [1]:
from estnltk.storage import PostgresStorage

storage = PostgresStorage(dbname='egcut_epi',
                          pgpass_file='~/.pgpass',
                          schema="grammarextractor",
                          role='egcut_epi_grammarextractor_create')

collection = storage.get_collection('psa_measurements_v22')

INFO:storage.py:99: connecting to host: '10.6.6.29', port: '5432', dbname: 'egcut_epi', user: 'dages'
INFO:storage.py:113: schema: 'grammarextractor', temporary: False, role: 'egcut_epi_grammarextractor_create'


In [2]:
texts = []
for idx, thing in enumerate(collection.select(layers = ['measurements'])):
    texts.append(thing)
    print(idx)
    break

KeyError: 6

Since `measurements` layer envelopes `measurement_tokens` layer but `measurements_old` layer does not, first flatten `measurements` layer into `measurements_flat` layer. As these layers have different attribute names, mapping of attribute names is also needed.

In [5]:
from estnltk.taggers import FlattenTagger

In [6]:
from estnltk.taggers import FlattenTagger

flatten_tagger = FlattenTagger(input_layer='measurements',
                               output_layer='measurements_flat',
                               #output_attributes=['name', 'key', 'value', 'unit', 'DATE'],
                               output_attributes=['name', 'value'],
                               attribute_mapping=(('REGEX_TYPE', 'name'), 
                                                  ('VALUE','value'), 
                                                  ('UNIT', 'unit')
                                                  )
                               )

collection.create_layer(tagger=flatten_tagger, overwrite=True, progressbar='notebook')

INFO:collection.py:856: collection: 'psa_measurements_v22'
INFO:collection.py:877: nothing to overwrite, preparing to create a new layer: 'measurements_flat'


HBox(children=(IntProgress(value=0, max=11577), HTML(value='')))

KeyError: 6

Create layer of differences.

In [5]:
from estnltk.layer_operations import drop_annotations
from estnltk.storage.postgres import RowMapperRecord

In [6]:
def row_mapper_1(row):

    text_id, text = row[0], row[1]
    
    drop_annotations(layer=text.measurements_old,
                 attribute='name',
                 values={'TIMEX', 'PRAHT', 'PÕLETIK', 'EESNÄÄRME_BIOPSIA', 
                        'VABA_PSA_PROTSENT', 'PSA_ARVUDETA', 'PSA_KÕRGE', 'PSA_OK', 'VABA_PSA_ARVUDETA',
                         'NEUTROFIILID', 'LBBB', 'STEMI', 'NSTEMI', 'INFARKTI_KUUPÄEV', 
                         'KORONAARANGIOPLASTIKA', 'KORONAARANGIOGRAAFIA', 'BASOFIILID%', 
                        'EOSINOFIILID%', 'NEUTROFIILID%', 'MONOTSÜÜDID%', 'LÜMFOTSÜÜDID%', 'MPV%', 'PDW%',
                        'SUITS_JAH', 'SUITS_EI', 'TRUS', 'DRE', 'DRE_KAHELDAV', 'HINNAKOOD'},
                 function=None  # default: None
                 )
    for idx1, thing in enumerate(text.measurements_old):

        for idx2, value in enumerate(thing.value):
            if value:
                text.measurements_old[idx1][idx2].value = value.replace(',', '.').replace(' ', '').strip('.')
                
        for idx3, name in enumerate(thing.name):
            if name == 'VERESUHKUR':
                text.measurements_old[idx1][idx3].name = 'GLÜKOOS'
            elif name == 'BMI':
                text.measurements_old[idx1][idx3].name = 'KMI'
            elif name == 'FS-CRP':
                text.measurements_old[idx1][idx3].name = 'CRP'    
            else:
                text.measurements_old[idx1][idx3].name = name

    layer = text.measurements_old
    layer.name = 'measurements_old_new'

    return [RowMapperRecord(layer=layer, meta=None)]

In [None]:
def fun():
    for idx, thing in enumerate(collection.select(layers = ['measurements_old'])):

        if len(thing[1].measurements_old) == 0:
            continue
        for span in thing[1].measurements_old:

            for annotation in span.annotations:
                if annotation.value != None:
                    if ',' in annotation.value:
                        #print(annotation.value)
                        return thing


In [8]:
collection.create_layer('measurements_old_new',
                                 data_iterator=collection.select(layers = ['measurements_old']),
                                 row_mapper=row_mapper_1, overwrite = True, progressbar='notebook')

INFO:collection.py:856: collection: 'psa_measurements_v22'
INFO:collection.py:877: nothing to overwrite, preparing to create a new layer: 'measurements_old_new'
INFO:collection.py:942: layer created: 'measurements_old_new'


In [9]:
flatten_tagger = FlattenTagger(input_layer='measurements_old_new',
                               output_layer='measurements_old_flat',
                               #output_attributes=['name', 'key', 'value', 'unit', 'DATE'],
                               output_attributes=['name', 'value', 'unit'],
                               #attribute_mapping=(('REGEX_TYPE', 'name'), 
                               #                   ('VALUE','value'), 
                               #                   )
                               )

collection.create_layer(tagger=flatten_tagger, overwrite=True, progressbar='notebook')

INFO:collection.py:856: collection: 'psa_measurements_v22'
INFO:collection.py:861: overwriting output layer: 'measurements_old_flat'
INFO:collection.py:1051: layer deleted: 'measurements_old_flat'


HBox(children=(IntProgress(value=0, max=11577), HTML(value='')))


INFO:collection.py:942: layer created: 'measurements_old_flat'


In [13]:
from collections import OrderedDict
from estnltk.taggers import DiffTagger

diff_tagger = DiffTagger(layer_a='measurements_old_new',
                         layer_b='measurements_flat',
                         output_layer='measurements_diff_old_flat',
                         output_attributes=['name',  'value', 'unit']
                        )

In [12]:
meta = OrderedDict([('unchanged_annotations', 'int'),
                    ('missing_annotations', 'int'),
                    ('extra_annotations', 'int'),
                    ('unchanged_spans', 'int'),
                    ('modified_spans', 'int'),
                    ('missing_spans', 'int'),
                    ('extra_spans', 'int'),
                    ('conflicts', 'int'),
                    ('overlapped', 'int'),
                    ('prolonged', 'int'),
                    ('shortened', 'int')])

collection.create_layer(tagger=diff_tagger,
                                 overwrite=True,
                                 meta=meta,
                                 progressbar='notebook'
                                 )

INFO:collection.py:856: collection: 'psa_measurements_v22'
INFO:collection.py:877: nothing to overwrite, preparing to create a new layer: 'measurements_diff_old_flat'


PgCollectionException: there is no 'measurements_flat' layer in the collection 'psa_measurements_v22'

## Statistics
Tabel of diff layer meta data.

In [2]:
from estnltk.taggers.standard_taggers.diff_tagger import DiffSampler


sampler = DiffSampler(collection=collection, layer='measurements_diff_old_flat')

In [3]:
sampler.layer_meta.loc[:10]

Unnamed: 0,text_id,unchanged_annotations,missing_annotations,extra_annotations,unchanged_spans,modified_spans,missing_spans,extra_spans,conflicts,overlapped,prolonged,shortened,id
0,1,0,0,0,0,0,0,0,0,0,0,0,1
1,2,0,0,0,0,0,0,0,0,0,0,0,2
2,3,0,0,0,0,0,0,0,0,0,0,0,3
3,4,2,1,1,2,0,1,1,1,0,0,1,4
4,5,0,0,0,0,0,0,0,0,0,0,0,5
5,6,0,1,1,0,0,1,1,1,0,0,1,6
6,7,0,0,0,0,0,0,0,0,0,0,0,7
7,8,0,0,0,0,0,0,0,0,0,0,0,8
8,9,0,0,0,0,0,0,0,0,0,0,0,9
9,10,1,0,0,1,0,0,0,0,0,0,0,10


In [4]:
sampler.layer_meta.sum()

text_id                  2239578201
unchanged_annotations          3482
missing_annotations           31803
extra_annotations             30425
unchanged_spans                3482
modified_spans                  153
missing_spans                 31650
extra_spans                   30272
conflicts                     26461
overlapped                       43
prolonged                        74
shortened                     26344
id                       2239578201
dtype: int64

## Random samples
The following iterations give different kind of random samlples without replacement. The domain names of samples (`'modified_spans'`, `missing_spans'` etc) correspond to the summary table columns.

In [5]:
for text_id, span_index, span in sampler.sample_spans(k=100, domain='modified_spans'):
    print('text_id: {}, span index: {}'.format(text_id, span_index))
    span.display()

text_id: 971, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 15,36,42,modified,measurements_old_flat,FS-CRP,15
,36,42,modified,measurements_flat2,CRP,15


text_id: 1047, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP-0,190,195,modified,measurements_old_flat,FS-CRP,0
,190,195,modified,measurements_flat2,CRP,0


text_id: 2952, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 5,0",39,47,modified,measurements_old_flat,FS-CRP,5.0
,39,47,modified,measurements_flat2,CRP,5.0


text_id: 3230, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 12,10,19,modified,measurements_old_flat,MUU_PIKKUS,12
,10,19,modified,measurements_flat2,PIKKUS,12


text_id: 4724, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 1,44",96,104,modified,measurements_old_flat,FS-CRP,1.44
,96,104,modified,measurements_flat2,CRP,1.44


text_id: 5222, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-0,5",27,34,modified,measurements_old_flat,FS-CRP,0.5
,27,34,modified,measurements_flat2,CRP,0.5


text_id: 6424, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 20,117,123,modified,measurements_old_flat,FS-CRP,20
,117,123,modified,measurements_flat2,CRP,20


text_id: 6781, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 12,55,62,modified,measurements_old_flat,FS-CRP,12
,55,62,modified,measurements_flat2,CRP,12


text_id: 8519, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 88,21,27,modified,measurements_old_flat,FS-CRP,88
,21,27,modified,measurements_flat2,CRP,88


text_id: 8663, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 1,17",141,149,modified,measurements_old_flat,FS-CRP,1.17
,141,149,modified,measurements_flat2,CRP,1.17


text_id: 8979, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 10,61,70,modified,measurements_old_flat,MUU_PIKKUS,10
,61,70,modified,measurements_flat2,PIKKUS,10


text_id: 8985, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 10,18,27,modified,measurements_old_flat,MUU_PIKKUS,10
,18,27,modified,measurements_flat2,PIKKUS,10


text_id: 8995, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 15,10,19,modified,measurements_old_flat,MUU_PIKKUS,15
,10,19,modified,measurements_flat2,PIKKUS,15


text_id: 9001, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 5,10,18,modified,measurements_old_flat,MUU_PIKKUS,5
,10,18,modified,measurements_flat2,PIKKUS,5


text_id: 11046, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-1,8",73,80,modified,measurements_old_flat,FS-CRP,1.8
,73,80,modified,measurements_flat2,CRP,1.8


text_id: 11461, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-0,9",28,35,modified,measurements_old_flat,FS-CRP,0.9
,28,35,modified,measurements_flat2,CRP,0.9


text_id: 12235, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 7,4",101,108,modified,measurements_old_flat,FS-CRP,7.4
,101,108,modified,measurements_flat2,CRP,7.4


text_id: 12593, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 1,79",31,40,modified,measurements_old_flat,FS-CRP,1.79
,31,40,modified,measurements_flat2,CRP,1.79


text_id: 14140, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 06.01,7,16,modified,measurements_old_flat,FS-CRP,6.01
,7,16,modified,measurements_flat2,CRP,6.01


text_id: 14479, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 14,3",67,75,modified,measurements_old_flat,FS-CRP,14.3
,67,75,modified,measurements_flat2,CRP,14.3


text_id: 15475, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 1,6",0,7,modified,measurements_old_flat,FS-CRP,1.6
,0,7,modified,measurements_flat2,CRP,1.6


text_id: 17363, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 25,16,23,modified,measurements_old_flat,FS-CRP,25
,16,23,modified,measurements_flat2,CRP,25


text_id: 19724, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-0,7",82,89,modified,measurements_old_flat,FS-CRP,0.7
,82,89,modified,measurements_flat2,CRP,0.7


text_id: 21102, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 0,93",84,92,modified,measurements_old_flat,FS-CRP,0.93
,84,92,modified,measurements_flat2,CRP,0.93


text_id: 22374, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-2,8",117,124,modified,measurements_old_flat,FS-CRP,2.8
,117,124,modified,measurements_flat2,CRP,2.8


text_id: 25215, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-3,3",22,29,modified,measurements_old_flat,FS-CRP,3.3
,22,29,modified,measurements_flat2,CRP,3.3


text_id: 25912, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-12,5",45,53,modified,measurements_old_flat,FS-CRP,12.5
,45,53,modified,measurements_flat2,CRP,12.5


text_id: 26665, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 0,88",68,77,modified,measurements_old_flat,FS-CRP,0.88
,68,77,modified,measurements_flat2,CRP,0.88


text_id: 26780, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 1,68",98,107,modified,measurements_old_flat,FS-CRP,1.68
,98,107,modified,measurements_flat2,CRP,1.68


text_id: 27536, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 25,211,217,modified,measurements_old_flat,FS-CRP,25
,211,217,modified,measurements_flat2,CRP,25


text_id: 28826, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-0,5",77,84,modified,measurements_old_flat,FS-CRP,0.5
,77,84,modified,measurements_flat2,CRP,0.5


text_id: 28907, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP- 22,1",11,20,modified,measurements_old_flat,FS-CRP,22.1
,11,20,modified,measurements_flat2,CRP,22.1


text_id: 29037, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP-2,149,154,modified,measurements_old_flat,FS-CRP,2
,149,154,modified,measurements_flat2,CRP,2


text_id: 30217, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-1,4",35,42,modified,measurements_old_flat,FS-CRP,1.4
,35,42,modified,measurements_flat2,CRP,1.4


text_id: 32170, span index: 0


text,start,end,span_status,input_layer_name,name,value
MCH 32.2,311,322,modified,measurements_old_flat,MCH,3.0
,311,322,modified,measurements_flat2,MCH,32.2


text_id: 35006, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP -2,110,116,modified,measurements_old_flat,FS-CRP,2
,110,116,modified,measurements_flat2,CRP,2


text_id: 35848, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 6,5",140,148,modified,measurements_old_flat,FS-CRP,6.5
,140,148,modified,measurements_flat2,CRP,6.5


text_id: 38595, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-1,7",0,7,modified,measurements_old_flat,FS-CRP,1.7
,0,7,modified,measurements_flat2,CRP,1.7


text_id: 41910, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 17,6",47,56,modified,measurements_old_flat,FS-CRP,17.6
,47,56,modified,measurements_flat2,CRP,17.6


text_id: 42117, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 9,8",75,82,modified,measurements_old_flat,FS-CRP,9.8
,75,82,modified,measurements_flat2,CRP,9.8


text_id: 42556, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-0,6",74,81,modified,measurements_old_flat,FS-CRP,0.6
,74,81,modified,measurements_flat2,CRP,0.6


text_id: 44024, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 14,3",92,100,modified,measurements_old_flat,FS-CRP,14.3
,92,100,modified,measurements_flat2,CRP,14.3


text_id: 45429, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 1,69",37,46,modified,measurements_old_flat,FS-CRP,1.69
,37,46,modified,measurements_flat2,CRP,1.69


text_id: 46320, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 20,11,20,modified,measurements_old_flat,MUU_PIKKUS,20
,11,20,modified,measurements_flat2,PIKKUS,20


text_id: 47268, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-7,3",43,50,modified,measurements_old_flat,FS-CRP,7.3
,43,50,modified,measurements_flat2,CRP,7.3


text_id: 48924, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 15,36,42,modified,measurements_old_flat,FS-CRP,15
,36,42,modified,measurements_flat2,CRP,15


text_id: 49791, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP-1,120,125,modified,measurements_old_flat,FS-CRP,1
,120,125,modified,measurements_flat2,CRP,1


text_id: 50259, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 5,0",12,20,modified,measurements_old_flat,FS-CRP,5.0
,12,20,modified,measurements_flat2,CRP,5.0


text_id: 51540, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 20,117,123,modified,measurements_old_flat,FS-CRP,20
,117,123,modified,measurements_flat2,CRP,20


text_id: 51824, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 16,9",38,46,modified,measurements_old_flat,FS-CRP,16.9
,38,46,modified,measurements_flat2,CRP,16.9


text_id: 57990, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP -2,110,116,modified,measurements_old_flat,FS-CRP,2
,110,116,modified,measurements_flat2,CRP,2


text_id: 59719, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 7.2,59,68,modified,measurements_old_flat,FS-CRP,7.2
,59,68,modified,measurements_flat2,CRP,7.2


text_id: 60898, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP-2,3",49,56,modified,measurements_old_flat,FS-CRP,2.3
,49,56,modified,measurements_flat2,CRP,2.3


text_id: 62294, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP-4,7,12,modified,measurements_old_flat,FS-CRP,4
,7,12,modified,measurements_flat2,CRP,4


text_id: 62733, span index: 0


text,start,end,span_status,input_layer_name,name,value
CRP 27,0,6,modified,measurements_old_flat,FS-CRP,27
,0,6,modified,measurements_flat2,CRP,27


text_id: 64447, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 4,1",66,74,modified,measurements_old_flat,FS-CRP,4.1
,66,74,modified,measurements_flat2,CRP,4.1


text_id: 65336, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 0,01",13,22,modified,measurements_old_flat,FS-CRP,0.01
,13,22,modified,measurements_flat2,CRP,0.01


text_id: 65457, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 5,23",60,68,modified,measurements_old_flat,FS-CRP,5.23
,60,68,modified,measurements_flat2,CRP,5.23


text_id: 66107, span index: 0


text,start,end,span_status,input_layer_name,name,value
"CRP 0,12",78,86,modified,measurements_old_flat,FS-CRP,0.12
,78,86,modified,measurements_flat2,CRP,0.12


In [5]:
texts = []
for idx, thing in enumerate(collection.select(layers = ['measurements_old_new', 'measurements_flat2'])):
    texts.append(thing)

In [78]:
examples = []

In [79]:
examples.append([texts[48][1].text, 0, 20, texts[48][1].measurements_flat2.value[0][0]])

In [80]:
examples.append([texts[865][1].text, 0, 6, texts[865][1].measurements_old_new.value[0][0]])

In [81]:
examples.append([texts[3715][1].text, 0, 11, texts[3715][1].measurements_old_new.value[0][0]])

In [82]:
examples.append([texts[5697][1].text, 0, 23, texts[5697][1].measurements_old_new.value[0][0]])

In [83]:
examples.append([texts[8913][1].text, 581, 640, texts[8913][1].measurements_old_new.value[0][0]])

In [90]:
examples.append([texts[10211][1].text, 445, 454, texts[10211][1].measurements_old_new.value[1][0]])

In [91]:
examples.append([texts[10795][1].text, 0, 26, 7.87])

In [92]:
examples.append([texts[11797][1].text, 0, 8, 3.89])

In [93]:
examples.append([texts[12184][1].text, 0, 26, 1.93])

In [94]:
examples.append([texts[14024][1].text, 9, 22, texts[14024][1].measurements_flat2.value[0][0]])

In [98]:
examples.append([texts[16101][1].text, 130, 168, texts[16101][1].measurements_old_new.value[0][0]])

In [99]:
examples.append([texts[16255][1].text, 13, 28, 0.005])

In [100]:
examples.append([texts[21664][1].text, 0, 17, 0.003])

In [101]:
examples.append([texts[22501][1].text, 'NA', 'NA', 'NA'])

In [103]:
examples.append([texts[25211][1].text, 27, 56, texts[25211][1].measurements_old_new.value[0][0]])

In [104]:
examples.append([texts[33042][1].text, 28, 33, 8])

In [105]:
examples.append([texts[39973][1].text, 119, 130, texts[39973][1].measurements_old_new.value[2][0]])

In [106]:
examples.append([texts[45612][1].text, 0, 25, 1.16])

In [107]:
examples.append([texts[61095][1].text, 0, 26, 0.136])

In [111]:
lines = ['PSA 03042012 - 0,83ng/ml perearsti poolt .',
 'PSA 2010. 3ng/ml, PSA 2012. 1,53ng/ml . - Bx va',
 'PSA 20105,99 ja 26.01.2012 uuesti .',
 'PSA 2011 oli 0 , 4 nG7ml .',
 'PSA 201222,25ng/ml',
 'PSA 2 aastajooksuldünaamikata , eriuuring',
 ':psa 16,81! ! ! ! ! ,',
 'Happe-aluse tasakaal 6.0 ( 5.0 .. 8.0 )',
 'loli 25 mgx1 ja Monoprili 10 mg Kolesterool 2011a',
 'Kolesterool 1k aastas .',
 'Kõrgenenud kolesterool 2a ( mõõdetud ). Ei pea dieetist kinni',
 'Kontr Verekol 08.12a Per-le juurde .',
 's vas munajuha kasvaja op , günekol 3a tagssi .',
 '08.11.2010 PSA 13.12.2011 7,2ng/ml PSADT on väike .',
 'Rütmihäire tsüklipikkus 330 msek',
 'Loote pikkus : \xa0 3 mm - vastab\xa0 5 nädalat 6 päeva.',
 'Põhjendus: PALAT 10 # ALAT maksanäitaja',
 'ärme vähk 2007 aastast cT3N0M0PSA 59ng/ml .',
 'PSA 8,5( püsib aastaid selles väärtus',
 'S,P-PSA 4.130( <4.100 µg/L )',
 'PSA 5,2.',
 'Kolesterool oli 7,9 mmol/l 0',
 'kolesterool 6.4.',
 'Kolesterool 5,2 mmol/l - esialgu dieet .',
 'SK 3900 g , SP 51 cm .',
 'Lapse kaal 5,4 kg/82 mg/0,82 ml i/m .',
 'Kehakaal 80,2 kg , KMI 25,9',
 'S,P-NT-proBNP 668 ( <125 pg/mL ) S,P-Albumiin 43 ( 35 .. 52 g/L ) S,P-ALAT 25 ( <33 U/L )',
 'PSA 6,5 ng/ml, eesnäärme maht67cm3',
 'rjeldus : Siinusbradükardia Fr 587min']

In [112]:
examples.append([lines[0], 0, 24, 0.83])
examples.append([lines[1], 0, 16, 3])
examples.append([lines[2], 0, 12, 5.99])
examples.append([lines[3], 0, 18, 0.4])
examples.append([lines[4], 0, 18, 22.25])
examples.append([lines[5], 'NA', 'NA', 'NA'])
examples.append([lines[6], 1, 10, 16.81])
examples.append([lines[7], 'NA', 'NA', 'NA'])
examples.append([lines[8], 'NA', 'NA', 'NA'])
examples.append([lines[9], 'NA', 'NA', 'NA'])
examples.append([lines[10], 'NA', 'NA', 'NA'])
examples.append([lines[11], 'NA', 'NA', 'NA'])
examples.append([lines[12], 'NA', 'NA', 'NA'])
examples.append([lines[13], 11, 34, 7.2])
examples.append([lines[16], 'NA', 'NA', 'NA'])
examples.append([lines[17], 30, 41, 59])
examples.append([lines[18], 0, 7, 8.5])
examples.append([lines[19], 0, 14, 4.13])
examples.append([lines[20], 0, 7, 5.2])
examples.append([lines[21], 0, 26, 7.9])
examples.append([lines[24], 0, 9, 3900])

In [114]:
import csv
with open("examples_for_testing.csv", "w") as fout:
    writer = csv.writer(fout)
    for line in examples:
        writer.writerow(line)

In [97]:
texts[16101][1].measurements_old_new[0]

text,start,end,name,key,value,unit,low,high,systolic,diastolic,pulse,year,month,day,hour,minute,prygi,subject
"fS,fP-Gluc 0 min (pre 75g Gluc PO) 8.0",130,168,GLÜKOOS,"fS,fP-Gluc",8.0,,,,,,,,,,,,,


In [28]:
c = 0
for text_id, span_index, spans in sampler.sample_spans(50000, 'conflicts'):
    if spans[0].value == spans[1].value and spans[0].name == spans[1].name:
        continue
    if spans[0].name[0].strip() == 'VERERÕHK' and spans[1].name[0].strip() == 'VERERÕHK':
        continue
    if spans[0].name[0].strip() == 'PULSS' and spans[1].name[0].strip() == 'PULSS':
        continue    
    print('text_id: {}, span index: {}'.format(text_id, span_index))
    c += 1
    spans[0].display()
    spans[1].display()

text_id: 49, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 19 05,0,10,missing,measurements_old_new,PSA,1905


text,start,end,span_status,input_layer_name,name,value
"PSA 19 05 2011 -1,32",0,20,extra,measurements_flat2,PSA,1.32


text_id: 785, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA - 14.08,21,33,missing,measurements_old_new,PSA,14.08


text,start,end,span_status,input_layer_name,name,value
"PSA - 14.08.2013 - 11,48",22,46,extra,measurements_flat2,PSA,11.48


text_id: 866, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 97,0,6,missing,measurements_old_new,PSA,97


text,start,end,span_status,input_layer_name,name,value
PSA 97.,0,8,extra,measurements_flat2,PSA,


text_id: 1911, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 22 mm,4,16,missing,measurements_old_new,MUU_PIKKUS,22


text,start,end,span_status,input_layer_name,name,value
pikkus 22,4,13,extra,measurements_flat2,PIKKUS,22


text_id: 1930, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 31 03,0,10,missing,measurements_old_new,PSA,3103


text,start,end,span_status,input_layer_name,name,value
"PSA 31 03 2015- 2,68",0,21,extra,measurements_flat2,PSA,2.68


text_id: 3716, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA - 0.,83",0,11,missing,measurements_old_new,PSA,0..83


text,start,end,span_status,input_layer_name,name,value
PSA - 0,0,7,extra,measurements_flat2,PSA,0


text_id: 4115, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 14 10,19,31,missing,measurements_old_new,PSA,1410


text,start,end,span_status,input_layer_name,name,value
"PSA 14 10 2014-0,197",21,41,extra,measurements_flat2,PSA,0.197


text_id: 4578, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 01 11,2,14,missing,measurements_old_new,PSA,111


text,start,end,span_status,input_layer_name,name,value
"PSA 01 11 2012-3,75",3,23,extra,measurements_flat2,PSA,3.75


text_id: 5110, span index: 0


text,start,end,span_status,input_layer_name,name,value
Kreatiniin 73,0,13,missing,measurements_old_new,KREATINIIN,73


text,start,end,span_status,input_layer_name,name,value
Kreatiniin 73.,0,15,extra,measurements_flat2,KREATINIIN,


text_id: 5132, span index: 1


text,start,end,span_status,input_layer_name,name,value
"pikkus 178,5 cm",14,29,missing,measurements_old_new,MUU_PIKKUS,178.5


text,start,end,span_status,input_layer_name,name,value
"pikkus 178,5",14,26,extra,measurements_flat2,PIKKUS,178.5


text_id: 5698, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA 19082014-0,067ng/ml",0,23,missing,measurements_old_new,PSA,0.067


text,start,end,span_status,input_layer_name,name,value
PSA 19082014,0,12,extra,measurements_flat2,PSA,2014


text_id: 6677, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 11 04,0,11,missing,measurements_old_new,PSA,1104


text,start,end,span_status,input_layer_name,name,value
"PSA 11 04 2013- 7,12",0,21,extra,measurements_flat2,PSA,7.12


text_id: 6677, span index: 1


text,start,end,span_status,input_layer_name,name,value
PSA 16 12,29,40,missing,measurements_old_new,PSA,1612


text,start,end,span_status,input_layer_name,name,value
"PSA 16 12 2013-6,88",30,49,extra,measurements_flat2,PSA,6.88


text_id: 7345, span index: 0


text,start,end,span_status,input_layer_name,name,value
\nPSA 14 10,11,22,missing,measurements_old_new,PSA,1410


text,start,end,span_status,input_layer_name,name,value
"PSA 14 10 2011- 6,85",12,32,extra,measurements_flat2,PSA,6.85


text_id: 7688, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 29 11,0,10,missing,measurements_old_new,PSA,2911


text,start,end,span_status,input_layer_name,name,value
"PSA 29 11 2012 -2,44",0,20,extra,measurements_flat2,PSA,2.44


text_id: 7767, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 16 07,0,11,missing,measurements_old_new,PSA,1607


text,start,end,span_status,input_layer_name,name,value
"PSA 16 07 2015-2,9",0,19,extra,measurements_flat2,PSA,2.9


text_id: 7769, span index: 0


text,start,end,span_status,input_layer_name,name,value
Kreatiniin 23,0,13,missing,measurements_old_new,KREATINIIN,23


text,start,end,span_status,input_layer_name,name,value
Kreatiniin 23 09 2014-62,0,24,extra,measurements_flat2,KREATINIIN,62


text_id: 7947, span index: 0


text,start,end,span_status,input_layer_name,name,value
Kasv 100,33,41,missing,measurements_old_new,PIKKUS,100


text,start,end,span_status,input_layer_name,name,value
Kasv 1000,33,42,extra,measurements_flat2,PIKKUS,1000


text_id: 8725, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 05,0,10,missing,measurements_old_new,PSA,1205


text,start,end,span_status,input_layer_name,name,value
"PSA 12 05 2011-0,957",0,20,extra,measurements_flat2,PSA,0.957


text_id: 8725, span index: 1


text,start,end,span_status,input_layer_name,name,value
PSA 3005 2013,28,43,missing,measurements_old_new,PSA,30052013


text,start,end,span_status,input_layer_name,name,value
"PSA 3005 2013 -3,93",29,48,extra,measurements_flat2,PSA,3.93


text_id: 8730, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 15 10,0,10,missing,measurements_old_new,PSA,1510


text,start,end,span_status,input_layer_name,name,value
"PSA 15 10 2014 -3,31",0,20,extra,measurements_flat2,PSA,3.31


text_id: 8913, span index: 0


text,start,end,span_status,input_layer_name,name,value
"\nPSA 19082014-0,067ng/ml",20,44,missing,measurements_old_new,PSA,0.067


text,start,end,span_status,input_layer_name,name,value
PSA 19082014,21,33,extra,measurements_flat2,PSA,2014


text_id: 9358, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 11 06,0,10,missing,measurements_old_new,PSA,1106


text,start,end,span_status,input_layer_name,name,value
"PSA 11 06 2014 -5,06",0,20,extra,measurements_flat2,PSA,5.06


text_id: 9877, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 97,0,6,missing,measurements_old_new,PSA,97


text,start,end,span_status,input_layer_name,name,value
PSA 97.,0,8,extra,measurements_flat2,PSA,


text_id: 10173, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 02 09,0,10,missing,measurements_old_new,PSA,209


text,start,end,span_status,input_layer_name,name,value
"PSA 02 09 2015 - 3,9",0,20,extra,measurements_flat2,PSA,3.9


text_id: 10212, span index: 1


text,start,end,span_status,input_layer_name,name,value
"KMI 25,,6",445,454,missing,measurements_old_new,KMI,25..6


text,start,end,span_status,input_layer_name,name,value
KMI 25,445,451,extra,measurements_flat2,KMI,25


text_id: 10796, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 02.2013,0,12,missing,measurements_old_new,PSA,2.2013


text,start,end,span_status,input_layer_name,name,value
PSA 02,0,6,extra,measurements_flat2,PSA,2


text_id: 11114, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 27 02,39,51,missing,measurements_old_new,PSA,2702


text,start,end,span_status,input_layer_name,name,value
"PSA 27 02 2014 - 0,475",40,63,extra,measurements_flat2,PSA,0.475


text_id: 11363, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 97,0,6,missing,measurements_old_new,PSA,97


text,start,end,span_status,input_layer_name,name,value
PSA 97.,0,8,extra,measurements_flat2,PSA,


text_id: 11798, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 3 89,0,9,missing,measurements_old_new,PSA,389


text,start,end,span_status,input_layer_name,name,value
PSA 3,0,5,extra,measurements_flat2,PSA,3


text_id: 12185, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 17 1,0,10,missing,measurements_old_new,PSA,171


text,start,end,span_status,input_layer_name,name,value
PSA 17,0,7,extra,measurements_flat2,PSA,17


text_id: 12940, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 04 08,22,33,missing,measurements_old_new,PSA,408


text,start,end,span_status,input_layer_name,name,value
"PSA 04 08 2014 5,63",23,42,extra,measurements_flat2,PSA,5.63


text_id: 13187, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 7mm,49,62,missing,measurements_old_new,MUU_PIKKUS,7


text,start,end,span_status,input_layer_name,name,value
pikkusega 7,49,60,extra,measurements_flat2,PIKKUS,7


text_id: 14014, span index: 0


text,start,end,span_status,input_layer_name,name,value
\nPSA 01 04,11,22,missing,measurements_old_new,PSA,104


text,start,end,span_status,input_layer_name,name,value
"PSA 01 04 2013 -6,92",12,33,extra,measurements_flat2,PSA,6.92


text_id: 14025, span index: 0


text,start,end,span_status,input_layer_name,name,value
Kol 2011 4,9,19,missing,measurements_old_new,KOLESTEROOL,20114


text,start,end,span_status,input_layer_name,name,value
"Kol 2011 4, 2",9,22,extra,measurements_flat2,KOLESTEROOL,4.2


text_id: 14062, span index: 0


text,start,end,span_status,input_layer_name,name,value
Testo 14,0,8,missing,measurements_old_new,TESTOSTEROON,14


text,start,end,span_status,input_layer_name,name,value
"Testo 14, 7",0,11,extra,measurements_flat2,TESTOSTEROON,14.7


text_id: 14075, span index: 1


text,start,end,span_status,input_layer_name,name,value
pikkus 185 cm,12,25,missing,measurements_old_new,MUU_PIKKUS,185


text,start,end,span_status,input_layer_name,name,value
pikkus 185,12,22,extra,measurements_flat2,PIKKUS,185


text_id: 14171, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 23 01,0,11,missing,measurements_old_new,PSA,2301


text,start,end,span_status,input_layer_name,name,value
"PSA 23 01 2015 -0,404",0,22,extra,measurements_flat2,PSA,0.404


text_id: 15588, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 180\n,9,20,missing,measurements_old_new,MUU_PIKKUS,180


text,start,end,span_status,input_layer_name,name,value
pikkus 180,9,19,extra,measurements_flat2,PIKKUS,180


text_id: 16102, span index: 0


text,start,end,span_status,input_layer_name,name,value
"fS,fP-Gluc 0 min (pre 75g Gluc PO) 8.0",130,168,missing,measurements_old_new,GLÜKOOS,8.0


text,start,end,span_status,input_layer_name,name,value
Gluc 0,136,142,extra,measurements_flat2,GLÜKOOS,0


text_id: 16102, span index: 2


text,start,end,span_status,input_layer_name,name,value
"S,P-Gluc 120 min (post 75g Gluc PO) 9.2",211,250,missing,measurements_old_new,GLÜKOOS,9.2


text,start,end,span_status,input_layer_name,name,value
Gluc 120,215,223,extra,measurements_flat2,GLÜKOOS,120


text_id: 16153, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12.2013,0,12,missing,measurements_old_new,PSA,12.2013


text,start,end,span_status,input_layer_name,name,value
PSA 12,0,6,extra,measurements_flat2,PSA,12


text_id: 16163, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 05,0,10,missing,measurements_old_new,PSA,1205


text,start,end,span_status,input_layer_name,name,value
"PSA 12 05 2011-0,957",0,20,extra,measurements_flat2,PSA,0.957


text_id: 16163, span index: 1


text,start,end,span_status,input_layer_name,name,value
PSA 3005 2013,28,43,missing,measurements_old_new,PSA,30052013


text,start,end,span_status,input_layer_name,name,value
"PSA 3005 2013 -3,93",29,48,extra,measurements_flat2,PSA,3.93


text_id: 16256, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA3 0,13,19,missing,measurements_old_new,PSA,30


text,start,end,span_status,input_layer_name,name,value
PSA3,13,17,extra,measurements_flat2,PSA,3


text_id: 16298, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 24 04,0,10,missing,measurements_old_new,PSA,2404


text,start,end,span_status,input_layer_name,name,value
"PSA 24 04 2015 - 8,1",0,20,extra,measurements_flat2,PSA,8.1


text_id: 16738, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 19mm,32,46,missing,measurements_old_new,MUU_PIKKUS,19


text,start,end,span_status,input_layer_name,name,value
pikkusega 19,32,44,extra,measurements_flat2,PIKKUS,19


text_id: 17007, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA- 2012,0,9,missing,measurements_old_new,PSA,2012


text,start,end,span_status,input_layer_name,name,value
"PSA- 2012- 2,42",0,15,extra,measurements_flat2,PSA,2.42


text_id: 17018, span index: 0


text,start,end,span_status,input_layer_name,name,value
"fS,fP-Gluc 0 min (pre 75g Gluc PO) 6.5",234,272,missing,measurements_old_new,GLÜKOOS,6.5


text,start,end,span_status,input_layer_name,name,value
Gluc 0,240,246,extra,measurements_flat2,GLÜKOOS,0


text_id: 17018, span index: 2


text,start,end,span_status,input_layer_name,name,value
"S,P-Gluc 120 min (post 75g Gluc PO) 10.4",315,355,missing,measurements_old_new,GLÜKOOS,10.4


text,start,end,span_status,input_layer_name,name,value
Gluc 120,319,327,extra,measurements_flat2,GLÜKOOS,120


text_id: 17933, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkus 22mm,44,55,missing,measurements_old_new,MUU_PIKKUS,22


text,start,end,span_status,input_layer_name,name,value
pikkus 22,44,53,extra,measurements_flat2,PIKKUS,22


text_id: 19444, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 28 03,0,11,missing,measurements_old_new,PSA,2803


text,start,end,span_status,input_layer_name,name,value
"PSA 28 03 2012-2,1",0,19,extra,measurements_flat2,PSA,2.1


text_id: 21005, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 09,0,10,missing,measurements_old_new,PSA,1209


text,start,end,span_status,input_layer_name,name,value
"PSA 12 09 2013 - 9,03",0,21,extra,measurements_flat2,PSA,9.03


text_id: 21017, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 09,0,10,missing,measurements_old_new,PSA,1209


text,start,end,span_status,input_layer_name,name,value
"PSA 12 09 2013 - 9,03",0,21,extra,measurements_flat2,PSA,9.03


text_id: 21043, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 19 05,0,10,missing,measurements_old_new,PSA,1905


text,start,end,span_status,input_layer_name,name,value
"PSA 19 05 2011-2,66",0,19,extra,measurements_flat2,PSA,2.66


text_id: 21665, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA 0,00326.11.12",0,17,missing,measurements_old_new,PSA,0.003


text,start,end,span_status,input_layer_name,name,value
PSA 0,0,5,extra,measurements_flat2,PSA,0


text_id: 22502, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12.2013,0,11,missing,measurements_old_new,PSA,12.2013


text,start,end,span_status,input_layer_name,name,value
PSA 12,0,6,extra,measurements_flat2,PSA,12


text_id: 23270, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 20.11,0,9,missing,measurements_old_new,PSA,20.11


text,start,end,span_status,input_layer_name,name,value
"PSA 20.11. 2012 -0,972",0,22,extra,measurements_flat2,PSA,0.972


text_id: 24279, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 25 11,0,10,missing,measurements_old_new,PSA,2511


text,start,end,span_status,input_layer_name,name,value
"PSA 25 11 2015 -1,25",0,20,extra,measurements_flat2,PSA,1.25


text_id: 25141, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 31 03,0,10,missing,measurements_old_new,PSA,3103


text,start,end,span_status,input_layer_name,name,value
"PSA 31 03 2015- 2,68",0,21,extra,measurements_flat2,PSA,2.68


text_id: 25212, span index: 0


text,start,end,span_status,input_layer_name,name,value
IPSS 0-2-0-1-1-0-1 = 5 punkti,27,56,missing,measurements_old_new,IPSS,5


text,start,end,span_status,input_layer_name,name,value
IPSS 0,26,33,extra,measurements_flat2,IPSS,0


text_id: 25510, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 30.08,31,41,missing,measurements_old_new,PSA,30.08


text,start,end,span_status,input_layer_name,name,value
"PSA 30.08.2010- 0,98",32,52,extra,measurements_flat2,PSA,0.98


text_id: 26919, span index: 1


text,start,end,span_status,input_layer_name,name,value
pikkus 22 mm,61,73,missing,measurements_old_new,MUU_PIKKUS,22


text,start,end,span_status,input_layer_name,name,value
pikkus 22,61,70,extra,measurements_flat2,PIKKUS,22


text_id: 27008, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 20 052013,0,15,missing,measurements_old_new,PSA,20052013


text,start,end,span_status,input_layer_name,name,value
"PSA 20 052013 -0,259",0,21,extra,measurements_flat2,PSA,0.259


text_id: 27011, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 29 09,0,10,missing,measurements_old_new,PSA,2909


text,start,end,span_status,input_layer_name,name,value
"PSA 29 09 2011 -1,42",0,20,extra,measurements_flat2,PSA,1.42


text_id: 27673, span index: 0


text,start,end,span_status,input_layer_name,name,value
Kasv 100,33,41,missing,measurements_old_new,PIKKUS,100


text,start,end,span_status,input_layer_name,name,value
Kasv 1000,33,42,extra,measurements_flat2,PIKKUS,1000


text_id: 27989, span index: 0


text,start,end,span_status,input_layer_name,name,value
IPSS-1-2-1-2-5--2-1=14p,7,30,missing,measurements_old_new,IPSS,14


text,start,end,span_status,input_layer_name,name,value
IPSS-1,6,13,extra,measurements_flat2,IPSS,1


text_id: 28424, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 04 02,0,11,missing,measurements_old_new,PSA,402


text,start,end,span_status,input_layer_name,name,value
"PSA 04 02 2016-0,868",0,21,extra,measurements_flat2,PSA,0.868


text_id: 29888, span index: 0


text,start,end,span_status,input_layer_name,name,value
IPSS-1-2-1-2-5--2-1=14p,7,30,missing,measurements_old_new,IPSS,14


text,start,end,span_status,input_layer_name,name,value
IPSS-1,6,13,extra,measurements_flat2,IPSS,1


text_id: 29951, span index: 0


text,start,end,span_status,input_layer_name,name,value
IPSS 2+2+1+1+2+1+1/10,13,34,missing,measurements_old_new,IPSS,10


text,start,end,span_status,input_layer_name,name,value
IPSS 2,12,19,extra,measurements_flat2,IPSS,2


text_id: 33043, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA 8, 2013",27,40,missing,measurements_old_new,PSA,8.2013


text,start,end,span_status,input_layer_name,name,value
"PSA 8,",28,35,extra,measurements_flat2,PSA,


text_id: 33633, span index: 0


text,start,end,span_status,input_layer_name,name,value
\nPSA 09 07,20,32,missing,measurements_old_new,PSA,907


text,start,end,span_status,input_layer_name,name,value
"PSA 09 07 2014 -8,03",21,43,extra,measurements_flat2,PSA,8.03


text_id: 35265, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 02 11,0,10,missing,measurements_old_new,PSA,211


text,start,end,span_status,input_layer_name,name,value
"PSA 02 11 2015 - 0,34",0,21,extra,measurements_flat2,PSA,0.34


text_id: 36632, span index: 3


text,start,end,span_status,input_layer_name,name,value
FR 63,136,141,missing,measurements_old_new,PULSS,


text,start,end,span_status,input_layer_name,name,value
"eGFR 63,88",134,144,extra,measurements_flat2,name,63.88


text_id: 38214, span index: 1


text,start,end,span_status,input_layer_name,name,value
"PSA 3,4",0,9,missing,measurements_old_new,PSA,3.4


text,start,end,span_status,input_layer_name,name,value
IPSS 20,8,16,extra,measurements_flat2,IPSS,20


text_id: 38376, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 11mm,207,221,missing,measurements_old_new,MUU_PIKKUS,11


text,start,end,span_status,input_layer_name,name,value
pikkusega 11,207,219,extra,measurements_flat2,PIKKUS,11


text_id: 38379, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 8mm,266,279,missing,measurements_old_new,MUU_PIKKUS,8


text,start,end,span_status,input_layer_name,name,value
pikkusega 8,266,277,extra,measurements_flat2,PIKKUS,8


text_id: 38382, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 11mm,267,281,missing,measurements_old_new,MUU_PIKKUS,11


text,start,end,span_status,input_layer_name,name,value
pikkusega 11,267,279,extra,measurements_flat2,PIKKUS,11


text_id: 38385, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 6mm,154,167,missing,measurements_old_new,MUU_PIKKUS,6


text,start,end,span_status,input_layer_name,name,value
pikkusega 6,154,165,extra,measurements_flat2,PIKKUS,6


text_id: 38388, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 11mm,153,167,missing,measurements_old_new,MUU_PIKKUS,11


text,start,end,span_status,input_layer_name,name,value
pikkusega 11,153,165,extra,measurements_flat2,PIKKUS,11


text_id: 38391, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,157,171,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,157,169,extra,measurements_flat2,PIKKUS,12


text_id: 38394, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,243,257,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,243,255,extra,measurements_flat2,PIKKUS,12


text_id: 38397, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,259,273,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,259,271,extra,measurements_flat2,PIKKUS,12


text_id: 38400, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 13mm,293,307,missing,measurements_old_new,MUU_PIKKUS,13


text,start,end,span_status,input_layer_name,name,value
pikkusega 13,293,305,extra,measurements_flat2,PIKKUS,13


text_id: 38403, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,342,356,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,342,354,extra,measurements_flat2,PIKKUS,12


text_id: 38406, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 13mm,209,223,missing,measurements_old_new,MUU_PIKKUS,13


text,start,end,span_status,input_layer_name,name,value
pikkusega 13,209,221,extra,measurements_flat2,PIKKUS,13


text_id: 38409, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 14mm,241,255,missing,measurements_old_new,MUU_PIKKUS,14


text,start,end,span_status,input_layer_name,name,value
pikkusega 14,241,253,extra,measurements_flat2,PIKKUS,14


text_id: 38430, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 14 mm,32,47,missing,measurements_old_new,MUU_PIKKUS,14


text,start,end,span_status,input_layer_name,name,value
pikkusega 14,32,44,extra,measurements_flat2,PIKKUS,14


text_id: 38536, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 18 02,0,11,missing,measurements_old_new,PSA,1802


text,start,end,span_status,input_layer_name,name,value
"PSA 18 02 2016 -1,59",0,22,extra,measurements_flat2,PSA,1.59


text_id: 39058, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 11mm,98,112,missing,measurements_old_new,MUU_PIKKUS,11


text,start,end,span_status,input_layer_name,name,value
pikkusega 11,98,110,extra,measurements_flat2,PIKKUS,11


text_id: 39061, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 8mm,191,204,missing,measurements_old_new,MUU_PIKKUS,8


text,start,end,span_status,input_layer_name,name,value
pikkusega 8,191,202,extra,measurements_flat2,PIKKUS,8


text_id: 39064, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 11mm,192,206,missing,measurements_old_new,MUU_PIKKUS,11


text,start,end,span_status,input_layer_name,name,value
pikkusega 11,192,204,extra,measurements_flat2,PIKKUS,11


text_id: 39067, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 6mm,109,122,missing,measurements_old_new,MUU_PIKKUS,6


text,start,end,span_status,input_layer_name,name,value
pikkusega 6,109,120,extra,measurements_flat2,PIKKUS,6


text_id: 39070, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 11mm,108,122,missing,measurements_old_new,MUU_PIKKUS,11


text,start,end,span_status,input_layer_name,name,value
pikkusega 11,108,120,extra,measurements_flat2,PIKKUS,11


text_id: 39073, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,112,126,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,112,124,extra,measurements_flat2,PIKKUS,12


text_id: 39076, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,183,197,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,183,195,extra,measurements_flat2,PIKKUS,12


text_id: 39079, span index: 0


text,start,end,span_status,input_layer_name,name,value
pikkusega 12mm,184,198,missing,measurements_old_new,MUU_PIKKUS,12


text,start,end,span_status,input_layer_name,name,value
pikkusega 12,184,196,extra,measurements_flat2,PIKKUS,12


text_id: 39974, span index: 2


text,start,end,span_status,input_layer_name,name,value
RBC 5.03 10,119,135,missing,measurements_old_new,ERÜTROTSÜÜDID,5.03


text,start,end,span_status,input_layer_name,name,value
RBC 5,119,127,extra,measurements_flat2,ERÜTROTSÜÜDID,5


text_id: 43197, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 14 05,53,64,missing,measurements_old_new,PSA,1405


text,start,end,span_status,input_layer_name,name,value
"PSA 14 05 2014 -18,21",54,75,extra,measurements_flat2,PSA,18.21


text_id: 43422, span index: 0


text,start,end,span_status,input_layer_name,name,value
"\nPSA 20012015 8,26ng/ml",20,43,missing,measurements_old_new,PSA,8.26


text,start,end,span_status,input_layer_name,name,value
PSA 20012015,21,33,extra,measurements_flat2,PSA,2015


text_id: 44197, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 11,0,11,missing,measurements_old_new,PSA,1211


text,start,end,span_status,input_layer_name,name,value
"PSA 12 11 2013 - 3,73",0,22,extra,measurements_flat2,PSA,3.73


text_id: 45546, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 18 07,0,10,missing,measurements_old_new,PSA,1807


text,start,end,span_status,input_layer_name,name,value
"PSA 18 07 2014 - 4,05",0,21,extra,measurements_flat2,PSA,4.05


text_id: 45613, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 1,0,10,missing,measurements_old_new,PSA,121


text,start,end,span_status,input_layer_name,name,value
PSA 12,0,7,extra,measurements_flat2,PSA,12


text_id: 45617, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 14 03,0,10,missing,measurements_old_new,PSA,1403


text,start,end,span_status,input_layer_name,name,value
"PSA 14 03 2012 -0,06",0,20,extra,measurements_flat2,PSA,0.06


text_id: 46248, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 31 01,0,10,missing,measurements_old_new,PSA,3101


text,start,end,span_status,input_layer_name,name,value
"PSA 31 01 2014-9,13",0,19,extra,measurements_flat2,PSA,9.13


text_id: 47836, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 21 01,0,10,missing,measurements_old_new,PSA,2101


text,start,end,span_status,input_layer_name,name,value
"PSA 21 01 2016-0,710",0,20,extra,measurements_flat2,PSA,0.71


text_id: 48164, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 18 05,0,10,missing,measurements_old_new,PSA,1805


text,start,end,span_status,input_layer_name,name,value
"PSA 18 05 2015 - 2,11",0,21,extra,measurements_flat2,PSA,2.11


text_id: 48499, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA 20022015-0,16ng/ml",0,23,missing,measurements_old_new,PSA,0.16


text,start,end,span_status,input_layer_name,name,value
PSA 20022015,0,13,extra,measurements_flat2,PSA,2015


text_id: 48786, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 11,0,10,missing,measurements_old_new,PSA,1211


text,start,end,span_status,input_layer_name,name,value
"PSA 12 11 2012 1,67",0,19,extra,measurements_flat2,PSA,1.67


text_id: 48786, span index: 1


text,start,end,span_status,input_layer_name,name,value
PSA 29 11,27,38,missing,measurements_old_new,PSA,2911


text,start,end,span_status,input_layer_name,name,value
"PSA 29 11 2013 1,71",28,47,extra,measurements_flat2,PSA,1.71


text_id: 50705, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 97,0,6,missing,measurements_old_new,PSA,97


text,start,end,span_status,input_layer_name,name,value
PSA 97.,0,8,extra,measurements_flat2,PSA,


text_id: 50746, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12.2013,0,11,missing,measurements_old_new,PSA,12.2013


text,start,end,span_status,input_layer_name,name,value
PSA 12,0,6,extra,measurements_flat2,PSA,12


text_id: 50902, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA 20012016-0,573ng/ml",0,24,missing,measurements_old_new,PSA,0.573


text,start,end,span_status,input_layer_name,name,value
PSA 20012016,0,13,extra,measurements_flat2,PSA,2016


text_id: 53320, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 25 04,0,11,missing,measurements_old_new,PSA,2504


text,start,end,span_status,input_layer_name,name,value
"PSA 25 04 2015-6,82",0,20,extra,measurements_flat2,PSA,6.82


text_id: 53321, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 01 03,0,11,missing,measurements_old_new,PSA,103


text,start,end,span_status,input_layer_name,name,value
"PSA 01 03 2016-8,9",0,19,extra,measurements_flat2,PSA,8.9


text_id: 53413, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 2 30,0,9,missing,measurements_old_new,PSA,230


text,start,end,span_status,input_layer_name,name,value
PSA 2,0,5,extra,measurements_flat2,PSA,2


text_id: 53845, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 07 05,0,11,missing,measurements_old_new,PSA,705


text,start,end,span_status,input_layer_name,name,value
"PSA 07 05 2014 -0,455",0,22,extra,measurements_flat2,PSA,0.455


text_id: 54058, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 07 12,0,10,missing,measurements_old_new,PSA,712


text,start,end,span_status,input_layer_name,name,value
"PSA 07 12 2015- 7,32",0,20,extra,measurements_flat2,PSA,7.32


text_id: 55041, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12.2013,0,12,missing,measurements_old_new,PSA,12.2013


text,start,end,span_status,input_layer_name,name,value
PSA 12,0,6,extra,measurements_flat2,PSA,12


text_id: 56869, span index: 0


text,start,end,span_status,input_layer_name,name,value
\nPSA 24 04,11,22,missing,measurements_old_new,PSA,2404


text,start,end,span_status,input_layer_name,name,value
"PSA 24 04 2013-2,89",12,31,extra,measurements_flat2,PSA,2.89


text_id: 58645, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 18 02,0,11,missing,measurements_old_new,PSA,1802


text,start,end,span_status,input_layer_name,name,value
"PSA 18 02 2015-1,64",0,20,extra,measurements_flat2,PSA,1.64


text_id: 58748, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 97,0,6,missing,measurements_old_new,PSA,97


text,start,end,span_status,input_layer_name,name,value
PSA 97.,0,8,extra,measurements_flat2,PSA,


text_id: 59993, span index: 0


text,start,end,span_status,input_layer_name,name,value
"PSA 0,00326.11.12",0,17,missing,measurements_old_new,PSA,0.003


text,start,end,span_status,input_layer_name,name,value
PSA 0,0,5,extra,measurements_flat2,PSA,0


text_id: 60068, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 97,0,6,missing,measurements_old_new,PSA,97


text,start,end,span_status,input_layer_name,name,value
PSA 97.,0,8,extra,measurements_flat2,PSA,


text_id: 60710, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 08 10,0,11,missing,measurements_old_new,PSA,810


text,start,end,span_status,input_layer_name,name,value
"PSA 08 10 2015- 3,05",0,21,extra,measurements_flat2,PSA,3.05


text_id: 60712, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 11,1,12,missing,measurements_old_new,PSA,1211


text,start,end,span_status,input_layer_name,name,value
"PSA 12 11 2013 - 3,73",2,23,extra,measurements_flat2,PSA,3.73


text_id: 61096, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 012,0,11,missing,measurements_old_new,PSA,12012


text,start,end,span_status,input_layer_name,name,value
PSA 12,0,6,extra,measurements_flat2,PSA,12


text_id: 61530, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 02 10,2,13,missing,measurements_old_new,PSA,210


text,start,end,span_status,input_layer_name,name,value
"PSA 02 10 2015- 5,67",3,23,extra,measurements_flat2,PSA,5.67


text_id: 63204, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 15 01,0,11,missing,measurements_old_new,PSA,1501


text,start,end,span_status,input_layer_name,name,value
"PSA 15 01 2014- 1,53",0,21,extra,measurements_flat2,PSA,1.53


text_id: 64136, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 19 05,0,10,missing,measurements_old_new,PSA,1905


text,start,end,span_status,input_layer_name,name,value
"PSA 19 05 2011-2,66",0,19,extra,measurements_flat2,PSA,2.66


text_id: 64191, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 15 04,0,10,missing,measurements_old_new,PSA,1504


text,start,end,span_status,input_layer_name,name,value
"PSA 15 04 2015- 3,01",0,20,extra,measurements_flat2,PSA,3.01


text_id: 65342, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 06 02,0,11,missing,measurements_old_new,PSA,602


text,start,end,span_status,input_layer_name,name,value
"PSA 06 02 2014 -0,786",0,22,extra,measurements_flat2,PSA,0.786


text_id: 65733, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 2 30,0,9,missing,measurements_old_new,PSA,230


text,start,end,span_status,input_layer_name,name,value
PSA 2,0,5,extra,measurements_flat2,PSA,2


text_id: 65801, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 01 09,0,10,missing,measurements_old_new,PSA,109


text,start,end,span_status,input_layer_name,name,value
PSA 01 09 2015 -1,0,17,extra,measurements_flat2,PSA,1


In [29]:
38214

134

In [14]:
for text_id, span_index, spans in sampler.sample_spans(2, 'overlapped'):
    print('text_id: {}, span index: {}'.format(text_id, span_index))
    spans[0].display()
    spans[1].display()

text_id: 33766, span index: 0


text,start,end,span_status,input_layer_name,name,value
IPSS 19p,18,26,missing,measurements_old_flat,IPSS,19


text,start,end,span_status,input_layer_name,name,value
IPSS 19,17,25,extra,measurements_flat2,IPSS,19


text_id: 60712, span index: 0


text,start,end,span_status,input_layer_name,name,value
PSA 12 11,1,12,missing,measurements_old_flat,PSA,1211


text,start,end,span_status,input_layer_name,name,value
"PSA 12 11 2013 - 3,73",2,23,extra,measurements_flat2,PSA,3.73


In [None]:
for text_id, span_index, spans in sampler.sample_spans(2, 'prolonged'):
    print('text_id: {}, span index: {}'.format(text_id, span_index))
    spans[0].display()
    spans[1].display()

In [None]:
for text_id, span_index, spans in sampler.sample_spans(2, 'shortened'):
    print('text_id: {}, span index: {}'.format(text_id, span_index))
    spans[0].display()
    spans[1].display()

Display `measurements_diff_1_2` layer.

In [None]:
from estnltk import Layer
Layer.print_start_end = True

texts = collection.select(layers=['measurement_tokens',
                                  'measurements_old',
                                  'measurements_flat',
                                  'measurements_diff_old_flat'],
                          keys=[4])
text_id, text = next(texts)
text.measurements_diff_old_flat

In [None]:
storage.close()