# Measurement taggers

``RobustDateNumberTagger``, ``MeasurementObjectTagger`` and ``UnitTagger`` all wrap ``RegexTagger`` and differ mainly by ``vocabulary``. These taggers in turn are wrapped by ``MeasurementTagger``.
## RobustDateNumberTagger

In [1]:
from estnltk import Text
from estnltk.taggers.measurement_tagging.robust_date_number_tagger import RobustDateNumberTagger

In [2]:
date_number_tagger = RobustDateNumberTagger(conflict_resolving_strategy='ALL')
date_number_tagger

name,layer,attributes,depends_on
RobustDateNumberTagger,dates_numbers,"('grammar_symbol', 'regex_type', 'value')",[]

0,1
tagger,"RegexTagger(conflict_resolving_strategy=ALL, overlapped=True)"


In [3]:
text = Text('PSA 2010. 3ng/ml, PSA 2012. 1,53ng/ml . - Bx va')
text

text
"PSA 2010. 3ng/ml, PSA 2012. 1,53ng/ml . - Bx va"


In [4]:
date_number_tagger.tag(text)
text.dates_numbers

layer name,attributes,parent,enveloping,ambiguous,span count
dates_numbers,"grammar_symbol, regex_type, value",,,False,4

text,start,end,grammar_symbol,regex_type,value
2010,4,8,DATE,date9,partial_date
3,10,11,NUMBER,anynumber,3
2012,22,26,DATE,date9,partial_date
153,28,32,NUMBER,anynumber,1.53


## UnitTagger

In [5]:
from estnltk.taggers.measurement_tagging.unit_tagger import UnitTagger

unit_tagger = UnitTagger(conflict_resolving_strategy='ALL')
unit_tagger

name,layer,attributes,depends_on
UnitTagger,units,"('grammar_symbol', 'regex_type', 'value')",[]

0,1
tagger,"RegexTagger(conflict_resolving_strategy=ALL, overlapped=True)"


In [6]:
unit_tagger.tag(text)
text.units

layer name,attributes,parent,enveloping,ambiguous,span count
units,"grammar_symbol, regex_type, value",,,False,7

text,start,end,grammar_symbol,regex_type,value
ng/ml,11,16,UNIT,psa_unit,ng/ml
g,12,13,UNIT,kaal_unit,x
m,14,15,UNIT,pikkus_unit,x
ng/ml,32,37,UNIT,psa_unit,ng/ml
g,33,34,UNIT,kaal_unit,x
m,35,36,UNIT,pikkus_unit,x
x,43,44,UNIT,time_unit,x


## MeasurementObjectTagger

In [7]:
from estnltk.taggers.measurement_tagging.measurement_object_tagger import MeasurementObjectTagger
mo_tagger = MeasurementObjectTagger(conflict_resolving_strategy='ALL')
mo_tagger

name,layer,attributes,depends_on
MeasurementObjectTagger,measurement_objects,"('grammar_symbol', 'regex_type', 'value')",[]

0,1
tagger,"RegexTagger(conflict_resolving_strategy=ALL, overlapped=True)"


In [8]:
mo_tagger(text)
text.measurement_objects

layer name,attributes,parent,enveloping,ambiguous,span count
measurement_objects,"grammar_symbol, regex_type, value",,,False,3

text,start,end,grammar_symbol,regex_type,value
PSA,0,3,MO,measurement_object,psa
PSA,17,21,MO,measurement_object,psa
PSA,18,21,MO,measurement_object,psa


In [9]:
text

text
"PSA 2010. 3ng/ml, PSA 2012. 1,53ng/ml . - Bx va"

layer name,attributes,parent,enveloping,ambiguous,span count
dates_numbers,"grammar_symbol, regex_type, value",,,False,4
measurement_objects,"grammar_symbol, regex_type, value",,,False,3
units,"grammar_symbol, regex_type, value",,,False,7


## MeasurementTagger
``MeasurementTagger`` merges ``RobustDatesNumbersTagger``, ``MeasurementObjectTagger`` and ``UnitsTagger`` outputs using ``MergeTagger`` and creates the ``grammar_tags`` layer.

In [10]:
from estnltk.taggers.measurement_tagging.measurement_tagger import MeasurementTagger

measurement_tagger = MeasurementTagger(attributes=('grammar_symbol', 'regex_type', 'value'), # default
                                       conflict_resolving_strategy='ALL', # default: 'MAX'
                                       overlapped=True, # default
                                       layer_name='grammar_tags' # default
                                      )
measurement_tagger

name,layer,attributes,depends_on
MeasurementTagger,grammar_tags,"('grammar_symbol', 'regex_type', 'value')",[]

0,1
date_number_tagger,"RobustDateNumberTagger(tagger=RegexTagger(conflict_resolving_strategy=ALL, overlapped=True))"
measurement_object_tagger,"MeasurementObjectTagger(tagger=RegexTagger(conflict_resolving_strategy=ALL, overlapped=True))"
unit_tagger,"UnitTagger(tagger=RegexTagger(conflict_resolving_strategy=ALL, overlapped=True))"
merge_tagger,MergeTagger()


In [11]:
text = Text('PSA 2010. 3ng/ml, PSA 2012. 1,53ng/ml . - Bx va')
measurement_tagger.tag(text)

text
"PSA 2010. 3ng/ml, PSA 2012. 1,53ng/ml . - Bx va"

layer name,attributes,parent,enveloping,ambiguous,span count
grammar_tags,"grammar_symbol, value",,,False,14


In [12]:
text.grammar_tags

layer name,attributes,parent,enveloping,ambiguous,span count
grammar_tags,"grammar_symbol, value",,,False,14

text,start,end,grammar_symbol,value
PSA,0,3,MO,psa
2010,4,8,DATE,partial_date
3,10,11,NUMBER,3
ng/ml,11,16,UNIT,ng/ml
g,12,13,UNIT,x
m,14,15,UNIT,x
PSA,17,21,MO,psa
PSA,18,21,MO,psa
2012,22,26,DATE,partial_date
153,28,32,NUMBER,1.53
