Evaluate Azure Cognitive Services for languages to identify PII using the Presidio Evaluator framework

Prerequisites: 
 - Azure subscription - [Create one for free](https://azure.microsoft.com/en-us/free/cognitive-services/)
 - Once you have your Azure subscription, create a Language resource in the Azure portal to get your key and endpoint. After it deploys, click Go to resource.
 - You'll need the key and endpoint from the resource you create to connect your application to the API. You'll paste your key and endpoint into the code below later in the quickstart.
 - You can use the free pricing tier (Free F0) to try the service, and upgrade later to a paid tier for production.
 - To use the Analyze feature, you'll need a Language resource with the standard (S) pricing tier.

Azure Cognitive Services for languages quickstart: https://learn.microsoft.com/en-us/azure/cognitive-services/language-service/personally-identifiable-information/quickstart?pivots=programming-language-python

In [None]:
from pathlib import Path
from copy import deepcopy
from pprint import pprint
from collections import Counter

from presidio_evaluator import InputSample
from presidio_evaluator.evaluation import Evaluator, ModelError
from presidio_evaluator.models import TextAnalyticsWrapper
from presidio_evaluator.experiment_tracking import get_experiment_tracker
import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

%reload_ext autoreload
%autoreload 2

Select data for evaluation

In [None]:
dataset_name = "synth_dataset_v2.json"
dataset = InputSample.read_dataset_json(Path(Path.cwd().parent.parent, "data", dataset_name))
print(len(dataset))

In [None]:
entity_counter = Counter()
for sample in dataset:
    for tag in sample.tags:
        entity_counter[tag] += 1

Dataset exploration

In [None]:
print("Count per entity:")
pprint(entity_counter.most_common())

print("\nExample sentence:")
print(dataset[1])

print("\nMin and max number of tokens in dataset:")
print(
    f"Min: {min([len(sample.tokens) for sample in dataset])}, "
    f"Max: {max([len(sample.tokens) for sample in dataset])}"
)

print("\nMin and max sentence length in dataset:")
print(
    f"Min: {min([len(sample.full_text) for sample in dataset])}, "
    f"Max: {max([len(sample.full_text) for sample in dataset])}"
)

Run evaluation

In [None]:
model_name = "Text analytics Analyzer"
# Paste your Azure Text Analytics key and endpoint here
key = "XXXXXXXXXXXXXXXXXXXXXXXXXXX"
endpoint = "https://xxxxxxxxxxx.cognitiveservices.azure.com/"
model = TextAnalyticsWrapper(ta_key=key, ta_endpoint=endpoint)

In [None]:
print("Evaluating Azure Text Analytics.")

experiment = get_experiment_tracker()

# Mapping from dataset Entities to Text Analytics Entities. 
# All supported PII entity categories in Text Analytics are listed in this link: https://learn.microsoft.com/en-us/azure/cognitive-services/language-service/personally-identifiable-information/concepts/conversations-entity-categories
i2b2_entities_to_text_analytics =  {"PERSON":"Person",
                                "STREET_ADDRESS":"Address",
                                "GPE": "O",
                                "PHONE_NUMBER":"PhoneNumber",
                                "ORGANIZATION":"Organization",
                                "DATE_TIME": "DateTime",
                                "TITLE":"O",
                                "CREDIT_CARD":"CreditCardNumber",
                                "US_SSN":"USSocialSecurityNumber",
                                "AGE": "Age",
                                "NRP":"O",
                                "ZIP_CODE":"O",
                                "EMAIL_ADDRESS":"Email",
                                "DOMAIN_NAME":"URL",
                                "IP_ADDRESS":"IPAddress",
                                "IBAN_CODE":"InternationalBankingAccountNumber",   
                                "US_DRIVER_LICENSE":"USDriversLicenseNumber"
                                }
# List of entity names to focus the evaluator on (and ignore the rest) is defined with entities_to_keep parameter
evaluator = Evaluator(model=model, entities_to_keep=["Person", "Address"])
dataset_ = Evaluator.align_entity_types(
    deepcopy(dataset), entities_mapping=i2b2_entities_to_text_analytics
)

evaluation_results = evaluator.evaluate_all(dataset_)
results = evaluator.calculate_score(evaluation_results)

# update params tracking
params = {"dataset_name": dataset_name, "model_name": model_name}
params.update(model.to_log())
experiment.log_parameters(params)
experiment.log_dataset_hash(dataset)
experiment.log_metrics(results.to_log())
entities, confmatrix = results.to_confusion_matrix()
experiment.log_confusion_matrix(matrix=confmatrix, labels=entities)

# end experiment
experiment.end()

In [None]:
print("Confusion matrix:")
print(pd.DataFrame(confmatrix, columns=entities, index=entities))

In [None]:
print("Precision and recall")
print(results)