In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pprint import pprint
from glob import glob
import pkg_resources

from pymedext_eds.annotators import Endlines, SentenceTokenizer, Hypothesis, \
                                    ATCDFamille, SyntagmeTokenizer, Negation, RegexMatcher, \
                                    QuickUMLSAnnotator, Pipeline
from pymedext_eds.utils import rawtext_loader
from pymedext_eds.viz import display_annotations

## Load demo texts using `rawtext_loader`: 

In [3]:
data_path = pkg_resources.resource_filename('pymedext_eds', 'data/demo')
file_list = glob(data_path + '/*.txt')
chunk = [rawtext_loader(x) for x in file_list]

## Create annotators and pipeline: 

In [4]:
endlines = Endlines(['raw_text'], 'endlines', 'endlines:v1')
sentences = SentenceTokenizer(['endlines'], 'sentence', 'sentenceTokenizer:v1')
hypothesis = Hypothesis(['sentence'], 'hypothesis', 'hypothesis:v1')
family = ATCDFamille(['sentence'], 'context', 'ATCDfamily:v1')
syntagmes = SyntagmeTokenizer(['sentence'], 'syntagme', 'SyntagmeTokenizer:v1')
negation = Negation(['syntagme'], 'negation', 'Negation:v1')
regex = RegexMatcher(['endlines','syntagme'], 'regex', 'RegexMatcher:v1', 'list_regexp.json')
umls = QuickUMLSAnnotator(['syntagme'], 'umls', 'QuickUMLS:2020AA', 
                          quickumls_fp='data/umls2020AB_UL/',
                            overlapping_criteria='length',
                            threshold=0.9,
                            similarity_name='jaccard',
                            window=5)

pipeline = Pipeline(pipeline = [endlines, sentences, hypothesis, family, syntagmes, negation, regex, umls])

## Run the annotation pipeline : 

In [5]:
annotated_chunk = pipeline.annotate(chunk)

## Print an annotation of type 'regex' : 

In [6]:
pprint(chunk[0].get_annotations('regex')[10].to_dict())

{'ID': '662592ee-55bf-11eb-8e36-3c7d0a00025d',
 'attributes': {'context': 'patient',
                'hypothesis': 'certain',
                'id_regexp': 'id_regexp_chir',
                'label': 'Chirurgie récente',
                'negation': 'aff',
                'snippet': 't échanger avec moi sur la conduite à tenir ?" . A '
                           '12 h, le chirurgien rappelait la patiente et '
                           'organisait un rendez-vous auprès ',
                'version': 'v2'},
 'isEntity': True,
 'ngram': None,
 'source': 'RegexMatcher:v1',
 'source_ID': '661b9762-55bf-11eb-8e36-3c7d0a00025d',
 'span': (5541, 5550),
 'type': 'regex',
 'value': 'chirurgie'}


## Display annotations using `display_annotations`:

In [7]:
display_annotations(chunk[2], ['umls'], attributes = ['negation', 'hypothesis','context', 'semtypes'], label_key = 'cui')