In [2]:
from glob import glob
import pandas as pd
import re
from pprint import pprint
import pkg_resources

from pymedextcore.document import Document
from pymedext_eds.annotators import Endlines, SentenceTokenizer, SectionSplitter
from pymedext_eds.utils import rawtext_loader
from pymedext_eds.med import MedicationAnnotator, MedicationNormalizer

To use RuSHSentenceTokenizer, install PyRuSH using "pip install PyRuSH"


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
models_param = [
    {'tagger_path':'data/models/apmed5/entities/final-model.pt' ,
    'tag_name': 'entity_pred' },
    {'tagger_path':'data/models/apmed5/events/final-model.pt' ,
    'tag_name': 'event_pred' },
    {'tagger_path': "data/models/apmed5/drugblob/final-model.pt",
    'tag_name': 'drugblob_pred'}
]

data_path = pkg_resources.resource_filename('pymedext_eds', 'data/romedi')
romedi_path = glob(data_path + '/*.p')[0]

In [4]:
endlines = Endlines(["raw_text"], "clean_text", ID="endlines")
sections = SectionSplitter(['clean_text'], "section", ID= 'sections')
sentenceSplitter = SentenceTokenizer(["section"],"sentence", ID="sentences")
med = MedicationAnnotator(['sentence'], 'med', ID='med:v2', models_param=models_param,  device='cpu')
norm = MedicationNormalizer(['ENT/DRUG','ENT/CLASS'], 'normalized_mention', ID='norm',romedi_path= romedi_path)

pipeline = [endlines, sections, sentenceSplitter, med, norm]

2021-02-24 09:08:20,928 loading file data/models/apmed5/entities/final-model.pt
2021-02-24 09:08:24,606 loading file data/models/apmed5/events/final-model.pt
2021-02-24 09:08:31,564 loading file data/models/apmed5/drugblob/final-model.pt


In [5]:
data_path = pkg_resources.resource_filename('pymedext_eds', 'data/demo')
file_list = glob(data_path + '/*.txt')

docs = [rawtext_loader(x) for x in file_list]

In [6]:
for doc in docs:
    doc.annotate(pipeline)

Ignore 2 sentence(s) with no tokens.


In [8]:
pd.DataFrame.from_records(MedicationAnnotator.doc_to_omop(docs[0])).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
note_nlp_id,,,,,,,,,,,,
note_id,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1
person_id,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1,cas1
section_concept_id,antecedent,antecedent,antecedent,antecedent,conclusion,conclusion,conclusion,conclusion,antecedent,antecedent,conclusion,conclusion
snippet,: absence de notion de la prescription d’une ...,"INNOHEP 0,6 fait, antivitamine K à débuter c...","INNOHEP 0,6 ml x 1/j pendant 15 jours","PREVISCAN 3/4 de comprimé par jour le soir, p...",Les prélèvements réalisés dans notre laborato...,Elle avait été traitée par du PROZAC® en asso...,"En fonction des douleurs, refaire une infiltr...","""Persistance des douleurs, a eu des infiltrat...","Lors de l’expertise, la patiente soulignait :...","INNOHEP 0,6 fait, antivitamine K à débuter c...",Anticoagulation à dose curative à poursuivre ...,"Début octobre 2015, arrêt du traitement par AVK"
offset_begin,5687,5863,5956,5996,7620,8668,1989,2129,4874,5881,7905,8228
offset_end,5691,5870,5963,6005,7629,8674,2001,2138,4887,5895,7920,8231
lexical_variant,HBPM,INNOHEP,INNOHEP,PREVISCAN,PREVISCAN,PROZAC,cortisonique,cortisone,anticoagulant,antivitamine K,Anticoagulation,AVK
note_nlp_concept_id,,B01AB10 (TINZAPARINE),B01AB10 (TINZAPARINE),,,N06AB03 (FLUOXETINE),H02A (CORTICOIDES A USAGE SYSTEMIQUE NON ASSOC...,,B01AA (ANTIVITAMINES K),B01AA (ANTIVITAMINES K),B01AA (ANTIVITAMINES K),B01AA (ANTIVITAMINES K)
note_nlp_source_concept_id,ATC,ATC,ATC,ATC,ATC,ATC,ATC,ATC,ATC,ATC,ATC,ATC


In [9]:
doc = Document("1000 mg de doliprane matin et soir tant que la fièvre ne baisse pas.")

In [11]:
doc.annotate(pipeline)

In [12]:
doc.annotations[-1].to_dict()

{'type': 'ENT/DRUG',
 'value': 'doliprane',
 'ngram': None,
 'span': (11, 20),
 'source': 'med:v2',
 'source_ID': '9f427532-7677-11eb-bb58-0242ac102462',
 'isEntity': False,
 'attributes': {'section': 'head',
  'score': 0.999727189540863,
  'ENT/DOSE': [{'value': '1000 mg',
    'span': (0, 7),
    'type': 'ENT/DOSE',
    'attributes': {'score': 0.9907454550266266},
    'source_ID': '9f427532-7677-11eb-bb58-0242ac102462',
    'in_blob': True,
    'normalized_mention': 'val:1000.0__mg'}],
  'ENT/FREQ': [{'value': 'matin et soir',
    'span': (21, 34),
    'type': 'ENT/FREQ',
    'attributes': {'score': 0.9946552316347758},
    'source_ID': '9f427532-7677-11eb-bb58-0242ac102462',
    'in_blob': True,
    'normalized_mention': 'H_1 0 0 1'}],
  'snippet': '1000 mg de doliprane matin et soir',
  'normalized_mention': {'BN_label': ['DOLIPRANE'],
   'PIN_label': ['PARACÉTAMOL'],
   'IN_label': ['PARACÉTAMOL'],
   'ATC7': 'N02BE01 (PARACETAMOL)',
   'ATC5': 'N02BE (ANILIDES)',
   'ATC4': 'N02B 