In [None]:
!pip install --no-deps spacy-transformers 'spacy_alignments'
!python -m spacy download en_core_web_trf

import spacy
import torch
import cupy
import sys
import numpy as np
import scipy
import scipy.special
from spacy.training import Example
from spacy.scorer import Scorer
from spacy.tokens import DocBin
from spacy.util import filter_spans
from tqdm import tqdm
import json
import warnings
import cupy as cp
    
warnings.filterwarnings('ignore')

print(f"spaCy version: {spacy.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"CuPy version: {cupy.__version__}")

nlp = spacy.load("en_core_web_trf")
doc = nlp("Hello world")
print("âœ… Tutto funzionante!")

In [None]:
print("CuPy version:", cupy.__version__)
print("CUDA available:", cupy.cuda.is_available())
print("Torch GPU disponibile?", torch.cuda.is_available())
print("Torch versione:", torch.__version__)
print("CUDA supportata da torch:", torch.version.cuda)

In [None]:
def create_example_from_dataset(dataset_split):
    spacy_data = []

    for example in dataset_split:
        temp_dict = {}
        tokens = example['tokens']
        ent_tags = example['ent_tags']

        text = " ".join(tokens)

        temp_dict['text'] = text
        temp_dict['entities'] = []

        current_pos = 0

        for i in range(len(tokens)):
            token = tokens[i]

            start_idx = current_pos
            end_idx = start_idx + len(token)

            if ent_tags[i] != 'O':
                extracted_token = text[start_idx:end_idx]
                if extracted_token != token:
                    print("sbagliato!")

                temp_dict['entities'].append((start_idx, end_idx, ent_tags[i]))

            current_pos = end_idx + 1

        gold_ents = {"entities": temp_dict["entities"]}

        pred_doc = nlp.make_doc(text)
        
        example = Example.from_dict(pred_doc, gold_ents)
        example.predicted = nlp(text)
        spacy_data.append(example)

    return spacy_data

In [None]:
nlp = spacy.load("/kaggle/input/model-fix-maggiore/model-best")

with open('/kaggle/input/prova-cascade/pii_no_pii.json', 'r') as file:
    dataset = json.load(file)

medical_dataset = dataset['DatasetDict']['medical_consultations']['Dataset']['data']
examples = create_example_from_dataset(medical_dataset)

In [None]:
scorer = Scorer()
scores = scorer.score(examples) 
print("Precision:", scores.get("ents_p"))
print("Recall:",    scores.get("ents_r"))
print("F1:",        scores.get("ents_f"))
print("Per-type:",  scores.get("ents_per_type"))