## Spacy and Displacy para la inferencia

In [1]:
import spacy
from transformers import *
import pandas as pd
import numpy as np

In [2]:
from spacy import displacy
colors = {"B-Diagnóstico": "#00FF00",
        "I-Diagnóstico": "#00FF00",
        "B-Antecedentes": "#FFFF00",
        "I-Antecedentes": "#FFFF00",
        "B-Objetivo": "#FF00FF",
        "I-Objetivo": "#FF00FF"}
options = {"ents": ["B-Diagnóstico", "I-Diagnóstico", "B-Antecedentes",
                    "I-Antecedentes", "B-Objetivo", "I-Objetivo"], "colors":colors}

In [3]:
df_test = pd.read_csv('petctreports_sp.csv')
studies = df_test['StrippedReport'].to_list()

In [4]:
# load BERT model fine-tuned for Named Entity Recognition (NER)
ner = pipeline("ner", model="./roberta-base-biomedical-clinical-es-NER")

In [5]:
def get_entities_html(text, ner_result, title=None):
    """Visualize NER with the help of SpaCy"""
    ents = []
    for ent in ner_result:
        e = {}
        # add the start and end positions of the entity
        e["start"] = ent["start"]
        e["end"] = ent["end"]
        # add the score if you want in the label
        # e["label"] = f"{ent["entity"]}-{ent['score']:.2f}"
        e["label"] = ent["entity"]
        if ents and -1 <= ent["start"] - ents[-1]["end"] <= 1 and ents[-1]["label"] == e["label"]:
            # if the current entity is shared with previous entity
            # simply extend the entity end position instead of adding a new one
            ents[-1]["end"] = e["end"]
            continue
        ents.append(e)
    # construct data required for displacy.render() method
    render_data = [
        {
        "text": text,
        "ents": ents,
        "title": title,
        }
    ]
    displacy.render(render_data, style="ent", manual=True, jupyter=True, options=options)
    #spacy.displacy.render(render_data, style="ent")
    #return render_data

In [6]:
# get HTML representation of NER of our text
for text in studies[1020:1030]:
    doc_ner = ner(text)
    get_entities_html(text, doc_ner)