In [1]:
%cd ..

/home/ivan/Documents/rel_ext/pymedext_eds


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import json

In [4]:
import os

from glob import glob
import pandas as pd
import re
from pprint import pprint
import pkg_resources

from pymedextcore.document import Document
from pymedext_eds.annotators import Endlines, SentenceTokenizer, Hypothesis, \
                                    ATCDFamille, SyntagmeTokenizer, Negation, RegexMatcher, \
                                    QuickUMLSAnnotator, Pipeline, SectionSplitter, Pipeline

from pymedext_eds.utils import rawtext_loader
from pymedext_eds.ner import NERAnnotator
from pymedext_eds.norm import NERNormalizer, NormPheno, FeedDictionnary

  return torch._C._cuda_getDeviceCount() > 0


In [5]:
endlines = Endlines(['raw_text'], 'clean_text', 'endlines:v1')
sentences = SentenceTokenizer(['clean_text'], 'sentence', 'sentenceTokenizer:v1')
sections = SectionSplitter(['clean_text'], "section", ID= 'sections')
hypothesis = Hypothesis(['sentence'], 'hypothesis', 'hypothesis:v1')
family = ATCDFamille(['sentence'], 'context', 'ATCDfamily:v1')
syntagmes = SyntagmeTokenizer(['sentence'], 'syntagme', 'SyntagmeTokenizer:v1')
negation = Negation(['syntagme'], 'negation', 'Negation:v1')
regex = RegexMatcher(['clean_text','syntagme'], 'regex', 'RegexMatcher:v1', 'list_regexp.json')

sosy|T184|Sign or Symptom  


dsyn|T047|Disease or Syndrome  
neop|T191|Neoplastic Process  
comd|T049|Cell or Molecular Dysfunction  
mobd|T048|Mental or Behavioral Dysfunction   
patf|T046|Pathologic Function  
anab|T190|Anatomical Abnormality  
cgab|T019|Congenital Abnormality  
acab|T020|Acquired Abnormality  
inpo|T037|Injury or Poisoning  

diap|T060|Diagnostic Procedure  
lbpr|T059|Laboratory Procedure  
lbtr|T034|Laboratory or Test Result  

topp|T061|Therapeutic or Preventive Procedure  


In [6]:
#                                    accepted_semtypes = {'T184', 'T047', 'T191', 'T049', 'T048', 'T046', 'T190', 'T019', 'T020', 'T037','T060',  'T059', 'T034', 'T061'},


In [7]:
quick_umls_th = 0.9
quick_umls_dist = "score"

umls_syntagme = QuickUMLSAnnotator(['syntagme'], 'umls_syntagme', 'QuickUMLS:2020AA', 
                                   quickumls_fp='data/umls2_UL/',
                                   overlapping_criteria=quick_umls_dist,
                                   threshold=quick_umls_th,
                                   similarity_name='jaccard',
                                   accepted_semtypes = {'T184', 'T047', 'T191', 'T049', 'T048', 'T046', 'T190', 'T019', 'T020', 'T037'},
                                   window=5)

umls_signs = QuickUMLSAnnotator(['ENT/SIGNS'], 'umls_signs', 'QuickUMLS:2020AA', 
                                quickumls_fp='data/umls2_UL/',
                                overlapping_criteria=quick_umls_dist,
                                threshold=quick_umls_th,
                                similarity_name='jaccard',
                                accepted_semtypes = {'T184'},
                                window=5)

umls_diag_proc = QuickUMLSAnnotator(['ENT/DIAG_PROC'], 'umls_diag_proc', 'QuickUMLS:2020AA', 
                                quickumls_fp='data/umls2_UL/',
                                overlapping_criteria=quick_umls_dist,
                                threshold=quick_umls_th,
                                similarity_name='jaccard',
                                accepted_semtypes = {'T060', 'T059', 'T034'},
                                window=5)

umls_diag_name = QuickUMLSAnnotator(['ENT/DIAG_NAME'], 'umls_diag_name', 'QuickUMLS:2020AA', 
                                quickumls_fp='data/umls2_UL/',
                                overlapping_criteria=quick_umls_dist,
                                threshold=quick_umls_th,
                                similarity_name='jaccard',
                                accepted_semtypes = {'T047', 'T191', 'T049', 'T048', 'T046', 'T190', 'T019', 'T020', 'T037'},
                                window=5)

umls_therap = QuickUMLSAnnotator(['ENT/THERAP_PROC'], 'umls_therap', 'QuickUMLS:2020AA', 
                                quickumls_fp='data/umls2_UL/',
                                overlapping_criteria=quick_umls_dist,
                                threshold=quick_umls_th,
                                similarity_name='jaccard',
                                accepted_semtypes = {'T061'},
                                window=5)


In [8]:
models_param = [{'tagger_path':'data/models/apcner_deid/entities_7/best-model.pt' ,
                  'store_embedding':True,
                'tag_name': 'pheno_pred' }]

ner = NERAnnotator(['sentence'], 'ner', ID='med:v2', models_param=models_param,  device='cpu', reduce_embedding=False)

2021-04-30 12:00:53,773 loading file data/models/apcner_deid/entities_7/best-model.pt


In [19]:
dico_umls_exact_match = FeedDictionnary(['ENT/SIGNS','ENT/DIAG_NAME'],
                                        'feed_dictionnary_quick_umls',
                                        ID = 'fd:v1',
                                        path_dict='data/emb_dict_v1',
                                        threshold=1)

In [20]:
feed_dic_pipeline = [endlines, sections, sentences, ner, dico_umls_exact_match]

In [11]:
with open("../data/export_mincil_pheno_loc.v1.json") as h:
    docs = json.load(h)

In [12]:
docs = [Document(
        raw_text = x['ano_text'],
        ID = x['document_num'],
        attributes = {'person_id': x['patient_num']}
    ) for x in docs]

In [21]:
for doc in docs:
    doc.annotate(feed_dic_pipeline)
    break

In [22]:
import numpy as np

In [23]:
with open("data/emb_dict_v1label.npy", 'rb') as h:
    labels = np.load(h)

In [33]:
labels = np.repeat(labels[:, :2], labels[:, 2].astype('int'), 0)

In [24]:
with open("data/emb_dict_v1emb.npy", 'rb') as h:
    embeddings = np.load(h)

In [37]:
assert labels.shape[0] == embeddings.shape[0]

In [16]:
import numpy as np

In [27]:
test = np.array([[1.,2.,3.], [1.,2.,3.]])

In [22]:
test[:,:-1] = 0

In [28]:
test

array([[1., 2., 3.],
       [1., 2., 3.]])

In [30]:
np.insert(test, 0, "test", axis = 1)

ValueError: could not convert string to float: 'test'

In [21]:
np.array(['test', 'test1', np.array([1.,2.,3.])])

  np.array(['test', 'test1', np.array([1.,2.,3.])])


array(['test', 'test1', array([1., 2., 3.])], dtype=object)

In [15]:
%debug

> [0;32m/home/ivan/Documents/rel_ext/pymedext_eds/pymedext_eds/norm.py[0m(195)[0;36mannotate_function[0;34m()[0m
[0;32m    193 [0;31m                    [0;32mbreak[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    194 [0;31m[0;34m[0m[0m
[0m[0;32m--> 195 [0;31m        [0;32massert[0m [0;32mFalse[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    196 [0;31m        [0macc[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0marray[0m[0;34m([0m[0macc[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    197 [0;31m        [0;32mwith[0m [0mopen[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mpath_dict[0m[0;34m,[0m [0;34m'wb'[0m[0;34m)[0m [0;32mas[0m [0mf[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  annotations.source_id


*** NameError: name 'annotations' is not defined


ipdb>  annotation.ID


'99a48dba-a997-11eb-8e24-f1562e816fd1'


ipdb>  annotation.source_ID


'9887f6f6-a997-11eb-8e24-f1562e816fd1'


ipdb>  annotation.to_dict()


{'type': 'ENT/DIAG_NAME', 'value': 'Hyperglycemie provoquee orale', 'ngram': None, 'span': (1472, 1501), 'source': 'med:v2', 'source_ID': '9887f6f6-a997-11eb-8e24-f1562e816fd1', 'isEntity': True, 'attributes': {'score': 0.5852941870689392, 'embedding': array([[ 0.1298107 , -0.6174773 ,  1.1808369 , ...,  0.01855098,
        -0.16452028, -0.380367  ],
       [ 0.22376876, -0.35998854, -0.4024914 , ...,  0.48640388,
         0.3496063 , -0.7240446 ],
       [ 0.50107014,  0.09623145,  0.34049886, ...,  0.9763289 ,
         0.3043358 , -0.5588979 ]], dtype=float32)}, 'ID': '99a48dba-a997-11eb-8e24-f1562e816fd1'}


ipdb>  quit


In [33]:
norm_pheno = NormPheno(['ENT/SIGNS','ENT/DIAG_NAME'], 'normalized_mention', ID='pheno_dic:v1', path_dict='data/big_pheno_dic.csv' )

In [10]:
pipeline = [endlines, sections, sentences, hypothesis, family, syntagmes,
            negation, regex, umls_syntagme, ner,  norm_med, norm_pheno,
            umls_signs, umls_diag_proc, umls_diag_name, umls_therap]

In [11]:
# data_path = pkg_resources.resource_filename('pymedext_eds', 'data/demo')
file_list = glob("data/test_data" + '/*.txt')

docs = [rawtext_loader(x) for x in file_list]

In [12]:
file_list

['data/test_data/pheno_norm.txt', 'data/test_data/pheno_ner.txt']

In [13]:
for doc in docs:
    doc.annotate(pipeline)

In [14]:
doc.get_annotations('normalized_mention')[0].to_dict()

{'type': 'normalized_mention',
 'value': 'sclerose laterale amyotrophique',
 'ngram': None,
 'span': (66, 97),
 'source': 'pheno_dic:v1',
 'source_ID': 'd58022ce-a43b-11eb-814f-9df4f554ea1b',
 'isEntity': False,
 'attributes': {'score_cos': 0.9880014657974243,
  'mention': 'sclérose latérale amyotrophique',
  'cui': 'C0002736',
  'label': 'sclerose laterale amyotrophique',
  'hypothesis': 'certain',
  'context': 'patient',
  'score': 0.9832151134808859},
 'ID': 'd595a900-a43b-11eb-814f-9df4f554ea1b'}

In [15]:
doc.get_annotations("ENT/SIGNS")[2].to_dict()

{'type': 'ENT/SIGNS',
 'value': 'signe du flot',
 'ngram': None,
 'span': (335, 348),
 'source': 'med:v2',
 'source_ID': 'd58024e0-a43b-11eb-814f-9df4f554ea1b',
 'isEntity': True,
 'attributes': {'hypothesis': 'certain',
  'context': 'patient',
  'score': 0.678720494111379},
 'ID': 'd5910116-a43b-11eb-814f-9df4f554ea1b'}

In [11]:
ann_list = glob("../../brat_data/covid_pheno_norm/annotator_0_training" + '/*.ann')


In [12]:
def read_brat_ann(path):
    doc_id =  os.path.splitext(os.path.basename(path))[0].split('_')[-1]
    with open(path, "r") as h:
        ann_norm = []
        ann_ner = []
        for line in h.readlines():
            ann_id, field, mention = line.split('\t')
            if ann_id[0] == "T":
                ent_type = field.split(' ')[0]
                start = field.split(' ')[1]
                stop = field.split(' ')[-1]
                ann_ner.append((doc_id, ann_id, ent_type, start, stop, mention.strip()))
            elif ann_id[0] == "N":
                _, ent_id, code = field.split(' ')
                termino, code = code.split(':')
                ann_norm.append((doc_id, ann_id, ent_id, termino, code, mention.strip()))
    
    return(ann_norm, ann_ner)

In [13]:
all_ann = []
for ann_path in ann_list:
    ann_norm, ann_ner = read_brat_ann(ann_path)
    ann_ner = pd.DataFrame(ann_ner, columns=["doc_id", "ent_id", "ent_type", "start", "stop", "mention"])
    ann_norm =  pd.DataFrame(ann_norm, columns = ["doc_id", "ann_id", "ent_id", "termino", "cui", "mention"])
    ann_ner = ann_ner.merge(ann_norm, how= "left", on = ["doc_id", "ent_id"], suffixes=['_ner', '_norm'])
    all_ann.append(ann_ner)

all_ann = pd.concat(all_ann)

In [14]:
all_ann = all_ann.loc[lambda x:x.ent_type.isin(["DIAG_NAME", "SIGNS"])]

In [15]:
all_ann.assign(has_norm = lambda x:x.cui.notnull()).groupby(['ent_type'])['has_norm'].aggregate([sum, len, lambda x: sum(x)/len(x)])

Unnamed: 0_level_0,sum,len,<lambda_0>
ent_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DIAG_NAME,106,126,0.84127
SIGNS,127,180,0.705556


In [16]:
all_ann = all_ann.loc[lambda x:x.cui.notnull()]

In [17]:
all_ann.head()

Unnamed: 0,doc_id,ent_id,ent_type,start,stop,mention_ner,ann_id,termino,cui,mention_norm
7,698430240,T25,SIGNS,1510,1547,douleurs intenses notamment nocturnes,N13,UMLS_FR,C0030193,Douleur SAI
8,698430240,T1,SIGNS,201,208,malaise,N1,UMLS_FR,C0231218,Se sentait malade en général
9,698430240,T2,SIGNS,292,328,contusion directe de l’épaule droite,N2,UMLS_FR,C2218555,
10,698430240,T3,SIGNS,330,357,impotence massive immédiate,N3,UMLS_FR,C0311394,Incapacité de marcher
11,698430240,T4,DIAG_NAME,395,437,rupture étendue au niveau du supra-épineux,N4,UMLS_FR,C0439059,


In [19]:
# pd.DataFrame.from_records(NERAnnotator.doc_to_omop(docs[1]))

In [20]:
def to_pdf(doc, key):
    columns = ["doc_id", "mention", "start", "end", "cui", "label"]
    lines = [(doc.source_ID, t.value, t.span[0], t.span[1], t.attributes["cui"], t.attributes["label"]) \
             for t in doc.get_annotations(key)]
    
    
    df = pd.DataFrame(lines if lines else {k: [] for k in columns}, 
                      columns = columns
                     )    
    return df

In [21]:
ner_acc = []
syn_acc = []

for doc in docs:
    df_syn = to_pdf(doc, 'umls_syntagme')
    
#     ner_acc.append(to_pdf(doc, 'umls_therap'))
    ner_acc.append(to_pdf(doc, 'umls_diag_name'))
#     ner_acc.append(to_pdf(doc, 'umls_diag_proc'))
    ner_acc.append(to_pdf(doc, 'umls_signs'))

    syn_acc.append(df_syn)

ner_acc = pd.concat(ner_acc).assign(mod = "ner")
syn_acc = pd.concat(syn_acc).assign(mod = "syn")

In [22]:
ner_acc.head()

Unnamed: 0,doc_id,mention,start,end,cui,label,mod
0,698430240,contusion,297.0,306.0,C0009938,contusion,ner
1,698430240,rupture,401.0,408.0,C3203359,rupture,ner
2,698430240,rupture,446.0,453.0,C3203359,rupture,ner
3,698430240,épanchement intra-articulaire assez,484.0,519.0,C1253936,epanchement intra-articulair,ner
4,698430240,traumatisme,1429.0,1440.0,C3714660,traumatisme,ner


In [23]:
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, recall_score, precision_score

In [24]:
def compute_metrics(gold, pred, label = ""):
    tp, fn, fp, tn = confusion_matrix(gold, pred).ravel()
    acc = accuracy_score(gold, pred)
    f1= f1_score(gold, pred, average="binary")
    r = recall_score(gold, pred, average="binary")
    p = precision_score(gold, pred, average="binary")
    
    #reformat metrics
    metrics = pd.DataFrame([[tp + fn, tp, fn, fp, r, p, f1, acc]],
                           columns=['total_pos', "TP", "FN", "FP", "Recall", "Precision", "F1", "Accuracy"], index = [label])
    return metrics

In [25]:
all_merge = (all_ann
             .merge(ner_acc, how = "outer", on =["doc_id", "cui"])
             .merge(syn_acc, how = "outer", on =["doc_id", "cui"])
             .assign(true = lambda x:x.ent_id.notnull())
             .assign(pred_ner = lambda x:x['mod_x'].notnull())
             .assign(pred_q = lambda x:x['mod_y'].notnull())

            )

In [26]:
pd.concat([compute_metrics(all_merge.true, all_merge.pred_ner, label = "NER + QuickUMLS (ent level)"), 
           compute_metrics(all_merge.true, all_merge.pred_q, label = "QuickUMLS (ent level)"),
           compute_metrics(all_merge.drop_duplicates(["doc_id", "cui"]).true, all_merge.drop_duplicates(["doc_id", "cui"]).pred_ner, label = "NER + QuickUMLS (doc level)"),
           compute_metrics(all_merge.drop_duplicates(["doc_id", "cui"]).true, all_merge.drop_duplicates(["doc_id", "cui"]).pred_q, label = "QuickUMLS (doc level)")]
         ).round(2)

Unnamed: 0,total_pos,TP,FN,FP,Recall,Precision,F1,Accuracy
NER + QuickUMLS (ent level),168,73,95,162,0.78,0.86,0.81,0.71
QuickUMLS (ent level),168,3,165,140,0.81,0.78,0.79,0.66
NER + QuickUMLS (doc level),104,55,49,122,0.29,0.51,0.37,0.38
QuickUMLS (doc level),104,3,101,110,0.36,0.38,0.37,0.24


In [15]:
len(syn_acc), len(ner_acc)

(682, 418)

In [16]:
ner_acc.merge(syn_acc, how = "inner", on =["doc_id", "start", "end", "cui"]).shape

(296, 10)

In [17]:
diff = ner_acc.merge(syn_acc, how = "outer", on =["doc_id", "start", "end"]).loc[lambda x:x.cui_x != x.cui_y]

In [18]:
from IPython.display import display_html

In [19]:
gen = ((k, group) for k, group in diff.groupby('doc_id'))


In [21]:
k, group = next(gen)
ner_mention = group.loc[lambda x:x.mod_x.notnull(), ["mention_x", "label_x"]]
syn_mention = group.loc[lambda x:x.mod_y.notnull(), ["mention_y", "label_y"]]

display_html(ner_mention)
display_html(syn_mention)

Unnamed: 0,mention_x,label_x
19,cardiopathie ischémique,myelopathie ischemique
20,infarctus,infarctus
31,échographie cardiaque,echographie cardiaque
33,cardiovasculaire,syncope cardiovasculaire


Unnamed: 0,mention_y,label_y
33,cardiovasculaire,nevrose cardiovasculaire
450,consultation,teleconsultation
451,consultation,teleconsultation
452,suivi de cardiopathie ischémique,cardiopathie ischemique
453,fait infarctus,post infarctus
454,TSA et des membres inférieurs,ulceres des membres inferieurs
455,fraction d’éjection retrouvée,fraction d'ejection
456,échographie,echographie sai
457,consultation,teleconsultation
458,chute,chutes


In [None]:
# python -m quickumls.install -E FR data/umls data/quickumls

In [16]:
from pymedext_eds.viz import display_annotations

In [119]:
doc_demo = rawtext_loader("data/test_data/pheno_norm.txt") 

In [120]:
pipeline = [endlines, sections, sentences, hypothesis, family, syntagmes,
            negation, regex, umls_syntagme, ner,  norm_med, 
            umls_signs, umls_diag_proc, umls_diag_name, umls_therap]

In [121]:
doc_demo.annotate(pipeline)

In [105]:
display_annotations(doc_demo, entities=['ENT/DIAG_NAME', 'ENT/SIGNS'])

In [106]:
list(norm_pheno.dict_label.values())[:10]

[('C0038218', "crise d'asthme"),
 ('C0205929', 'fistule anale'),
 ('C0000833', 'abces'),
 ('C4324354', 'subfebrile'),
 ('C0030193', 'douleur'),
 ('C0031019', 'abces perianal'),
 ('C0016169', 'fistule'),
 ('C2825055', 'recidive'),
 ('C0015967', 'fievre'),
 ('C0242301', 'furoncle')]

In [45]:
import numpy as np

In [46]:
emb = pd.read_csv(norm_pheno.path_dict, header=None)
dict_label = {k:(v[0], v[1]) for k,v in emb.iloc[:,:2].iterrows()}
matrix_embeddings = np.ascontiguousarray(emb.iloc[:,2:].values.astype('float32'))

In [108]:
pd.DataFrame(dict_label, index =['cui', 'label']).T.cui.nunique()

5691

In [109]:
import torch

In [111]:
input2 = torch.randn(100, 128)

In [124]:
test = [(t.attributes['embedding'], t.value) for t in doc_demo.get_annotations('ENT/SIGNS')]

In [157]:
i = 5
test[i][1]

'va bien'

In [158]:
output = torch.cosine_similarity(torch.tensor(matrix_embeddings), torch.tensor(test[i][0]).reshape(1, -1))

In [159]:
dict_label[torch.argmax(output).item()]

('C0236102', 'etat general')

In [133]:
output

tensor([0.8235, 0.7694, 0.7538,  ..., 0.7837, 0.7984, 0.7687])

In [99]:
pd.DataFrame(dict_label, index =['cui', 'label']).T.loc[lambda x:x.label.str.contains('kyste')]

Unnamed: 0,cui,label
226,C0010709,kyste
285,C0029927,kyste ovarien
394,C0010709,kystes
428,C0400990,kyste biliaire
879,C0031925,kyste pilonidal
...,...,...
7153,C0035281,kyste retentionnel
7348,C1142385,kyste ovarien fonctionnel
7367,C0016429,kyste folliculaire de l'ovaire
7434,C0152244,kyste osseux anevrysmal


In [47]:
len(dict_label)

7529

In [48]:
matrix_embeddings[0,:]

array([ 0.23342381, -0.21253404,  0.4980659 , ...,  0.3178097 ,
       -0.04451039,  0.09476368], dtype=float32)

In [100]:
i = 226
norm_pheno.dict_label[i]

('C0010709', 'kyste')

In [101]:
test = norm_pheno.find_closest_embeddings(matrix_embeddings[i:i+1,:], 5)

In [102]:
[norm_pheno.dict_label[t] for t in test[0, :, 0]]

[('C0010709', 'kyste'),
 ('C1389462', 'kyste pelvien'),
 ('C1879828', 'teratome kystique'),
 ('C0272407', 'kyste splenique'),
 ('C0333145', 'kyste hemorragique')]

In [85]:
test[0, :, 1]

array([0.99999917, 0.99999917, 0.99999917, 0.99999917, 0.86560136])

In [42]:
norm_pheno.index_embedding.xb

<faiss.swigfaiss_avx2.FloatVector; proxy of <Swig Object of type 'std::vector< float > *' at 0x7fe0e9673ae0> >

In [31]:
len(norm_pheno.dict_label)

7529

In [107]:
[(t.value, t.attributes['mention'], t.attributes['score_cos']) for t in doc_demo.get_annotations('normalized_mention')]

[('cicatrice de brulure', 'kyste au niveau du rein droit', 0.9567136168479919),
 ('cicatrice de brulure',
  'kyste au niveau du poumon droit',
  0.962617039680481),
 ('demangeaison', 'kyste au niveau du pelvis', 0.9527400732040405),
 ('splenose', 'kyste au niveau splénique', 0.9514991044998169),
 ('splenose', 'kyste au niveau de la rate', 0.9609988927841187),
 ('kyste mammaire', 'kyste saignant', 0.8997305631637573),
 ('kyste hemorragique', 'kyste hémorragique', 0.9737793207168579),
 ('lesion hepatique', 'lésion', 0.9144874811172485),
 ('lesion hepatique', 'lésion', 0.9286418557167053),
 ('infection genitale', "signes d'ICG", 0.9405293464660645),
 ('infection genitale', "signes d'ICD", 0.9312077760696411),
 ('infection genitale', "signes d'IC", 0.9398549795150757),
 ('auscultation pulmonaire anormale',
  'Auscultation anormale',
  0.9592617750167847),
 ('diabete', 'diabète', 0.9452240467071533),
 ('diabete', 'diabète', 0.9414633512496948),
 ('dpv', 'IVG', 0.830310583114624),
 ('ne se s

In [23]:
display_annotations(doc_demo, entities=['normalized_mention'])

In [18]:
display_annotations(doc_demo, entities=['umls_syntagme'])

In [214]:
display_annotations(doc_demo, entities=['ENT/SIGNS'])

In [215]:
display_annotations(doc_demo, entities=['ENT/DIAG_NAME'])