## imports

In [2]:
from patterns.default_patterns import LABEL_MAP, RELEVANT_LABELS
from data.ai4privacy.pii_masking_200k import load_pii_dataset
from evaluation.metrics import compute_metrics, print_metrics
from evaluation.evaluator import evaluate_dataset
from classes.pii_detector import PIIDetector

## loading dataset and model

In [3]:
ds = load_pii_dataset(split="train")
detector = PIIDetector(model="en_core_web_trf")

## exploring dataset labels

In [4]:
all_labels = set()
for example in ds:
    for mask in example["privacy_mask"]:
        all_labels.add(mask["label"])

print(sorted(all_labels))
print(f"\nTotal unique labels: {len(all_labels)}")

['ACCOUNTNAME', 'ACCOUNTNUMBER', 'AGE', 'AMOUNT', 'BIC', 'BITCOINADDRESS', 'BUILDINGNUMBER', 'CITY', 'COMPANYNAME', 'COUNTY', 'CREDITCARDCVV', 'CREDITCARDISSUER', 'CREDITCARDNUMBER', 'CURRENCY', 'CURRENCYCODE', 'CURRENCYNAME', 'CURRENCYSYMBOL', 'DATE', 'DOB', 'EMAIL', 'ETHEREUMADDRESS', 'EYECOLOR', 'FIRSTNAME', 'GENDER', 'HEIGHT', 'IBAN', 'IP', 'IPV4', 'IPV6', 'JOBAREA', 'JOBTITLE', 'JOBTYPE', 'LASTNAME', 'LITECOINADDRESS', 'MAC', 'MASKEDNUMBER', 'MIDDLENAME', 'NEARBYGPSCOORDINATE', 'ORDINALDIRECTION', 'PASSWORD', 'PHONEIMEI', 'PHONENUMBER', 'PIN', 'PREFIX', 'SECONDARYADDRESS', 'SEX', 'SSN', 'STATE', 'STREET', 'TIME', 'URL', 'USERAGENT', 'USERNAME', 'VEHICLEVIN', 'VEHICLEVRM', 'ZIPCODE']

Total unique labels: 56


## evaluation

In [5]:
results, examples = evaluate_dataset(ds, detector, LABEL_MAP, RELEVANT_LABELS)

## computing precision, recall, f1

In [6]:
summary, totals = compute_metrics(results)
print_metrics(summary, totals)


--- Evaluation Summary (filtered) ---
ORG          Correct: 107  Missed: 416  Wrong: 136  P:0.440  R:0.205  F1:0.279
PHONENUMBER  Correct:  67  Missed:   0  Wrong: 339  P:0.165  R:1.000  F1:0.283
PERSON       Correct: 471  Missed: 361  Wrong:  39  P:0.924  R:0.566  F1:0.702
LOC          Correct:  22  Missed: 231  Wrong:  23  P:0.489  R:0.087  F1:0.148
ZIPCODE      Correct:  56  Missed:   0  Wrong: 419  P:0.118  R:1.000  F1:0.211
DATE         Correct: 157  Missed:  61  Wrong: 111  P:0.586  R:0.720  F1:0.646
GPE          Correct: 127  Missed:  56  Wrong:  11  P:0.920  R:0.694  F1:0.791
EMAIL        Correct:  84  Missed:   0  Wrong:   0  P:1.000  R:1.000  F1:1.000

--- Macro Averages ---
P:0.580  R:0.659  F1:0.508

--- Micro Averages ---
P:0.503  R:0.492  F1:0.498
