In [1]:
import medspacy
import spacy
from medspacy.ner import TargetRule
from medspacy.ner import ConceptTagger
from spacy.tokens import Span
from medspacy.visualization import visualize_dep, visualize_ent

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys
import os
sys.path.append('../src/')
import temporal_token_rules
import temporal_NER_rules

#### 1. token tagger

In [3]:
nlp = medspacy.load(medspacy_enable=["medspacy_pyrush"]) #sentenciser
concept_matcher = nlp.add_pipe("medspacy_concept_tagger")
nlp.pipe_names

['medspacy_pyrush', 'medspacy_concept_tagger']

In [4]:
# adding rules for tagging tokens
tagger_rules = temporal_token_rules.get_token_rules()
concept_matcher.add(tagger_rules) 

In [5]:
# test the token tagger
doc_token_tagger = nlp("for the past 2 weeks")
for token in doc_token_tagger:
    print(token, token._.concept_tag)

for 
the DETERMINER
past PAST
2 
weeks WEEK


#### 2. temporal name entity extraction

In [6]:
if "medspacy_target_matcher" in nlp.pipe_names:
    nlp.remove_pipe("medspacy_target_matcher")
target_matcher = nlp.add_pipe("medspacy_target_matcher")
nlp.pipe_names

['medspacy_pyrush', 'medspacy_concept_tagger', 'medspacy_target_matcher']

In [7]:
ner_rules = temporal_NER_rules.get_NER_rules()
target_matcher.add(ner_rules)

#### 3. examples

In [8]:
test_string = '''
He was admitted to the hospital on AUG 4,2016 @23:49. 
He stayed there for 7 days. 
He was discharged sometime this week.
He will take the medicine daily.
He has 2 episodes of homelessness.
He will take the medicine four times every day.
He used vape pen in the past 2 years.
'''

In [9]:
doc = nlp(test_string)

In [10]:
visualize_ent(doc)

#### 4. check the rules for the entities

In [11]:
for ent in doc.ents:
    print('ENTITY and LABEL',ent, ent.label_, ent._.target_rule)

ENTITY and LABEL AUG 4,2016 DATE TargetRule(literal="calendar date", category="DATE", pattern=[{'_': {'concept_tag': 'MONTH'}, 'OP': '+'}, {'IS_PUNCT': True, 'OP': '*'}, {'IS_SPACE': True, 'OP': '*'}, {'TEXT': {'REGEX': '^(?<![0-9])([0-2]?\\d|30|31)\\W(([12]\\d{3})|(\\d{2}))$'}, 'OP': '+'}], attributes=None, on_match=None)
ENTITY and LABEL @23:49 TIME TargetRule(literal="time", category="TIME", pattern=[{'TEXT': {'REGEX': '^\\W([0-1]?[0-9]|2[0-3]):[0-5][0-9]$'}, 'OP': '+'}, {'IS_SPACE': True, 'OP': '*'}, {'_': {'concept_tag': 'TIME'}, 'OP': '*'}], attributes=None, on_match=None)
ENTITY and LABEL for 7 days. DURATION TargetRule(literal="duration", category="DURATION", pattern=[{'LOWER': {'IN': ['for', 'about', 'in', 'during', 'between', 'nearly', 'almost']}, 'OP': '+'}, {'TEXT': {'REGEX': '^[0-9]|[0-9][0-9]$'}, 'OP': '+'}, {'_': {'concept_tag': 'DAY'}, 'OP': '+'}, {'_': {'concept_tag': 'AGO'}, 'OP': '!'}], attributes=None, on_match=None)
ENTITY and LABEL discharged sometime this week DA