In [None]:
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import TransformersNlpEngine
from presidio_anonymizer import AnonymizerEngine

text = "i am daniel and i stay in surabaya"

# Define which transformers model to use
model_config = [{"lang_code": "en", "model_name": {
    "spacy": "en_core_web_sm",  # use a small spaCy model for lemmas, tokens etc.
    "transformers": "dslim/bert-base-NER"
    # "transformers":"Isotonic/distilbert_finetuned_ai4privacy_v2"
    }
}]

nlp_engine = TransformersNlpEngine(models=model_config)

# Set up the engine, loads the NLP module (spaCy model by default) 
# and other PII recognizers
analyzer = AnalyzerEngine(nlp_engine=nlp_engine)

# Call analyzer to get results
results = analyzer.analyze(text=text, language='en')
print(results)

# Analyzer results are passed to the AnonymizerEngine for anonymization

anonymizer = AnonymizerEngine()

anonymized_text = anonymizer.anonymize(text=text, analyzer_results=results)

print(anonymized_text)

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


[type: LOCATION, start: 26, end: 34, score: 0.9957412481307983]
text: i am daniel and i stay in <LOCATION>
items:
[
    {'start': 26, 'end': 36, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'}
]



## Spacy

In [14]:
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine

text = "peter gave his book to heidi which later gave it to nicole. peter lives in london and nicole lives in tashkent."

# Set up the engine, loads the NLP module (spaCy model by default) 
# and other PII recognizers
analyzer = AnalyzerEngine()

# Call analyzer to get results
results = analyzer.analyze(text=text,
                           entities=["PHONE_NUMBER","PERSON","LOCATION","URL","NRP"],
                           language='en')
print(results)

# Analyzer results are passed to the AnonymizerEngine for anonymization

anonymizer = AnonymizerEngine()

anonymized_text = anonymizer.anonymize(text=text,analyzer_results=results)

print(anonymized_text)

[type: PERSON, start: 0, end: 5, score: 0.85, type: PERSON, start: 23, end: 28, score: 0.85, type: PERSON, start: 52, end: 58, score: 0.85, type: PERSON, start: 60, end: 65, score: 0.85, type: LOCATION, start: 75, end: 81, score: 0.85, type: PERSON, start: 86, end: 92, score: 0.85, type: LOCATION, start: 102, end: 110, score: 0.85]
text: <PERSON> gave his book to <PERSON> which later gave it to <PERSON>. <PERSON> lives in <LOCATION> and <PERSON> lives in <LOCATION>.
items:
[
    {'start': 119, 'end': 129, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'},
    {'start': 101, 'end': 109, 'entity_type': 'PERSON', 'text': '<PERSON>', 'operator': 'replace'},
    {'start': 86, 'end': 96, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'},
    {'start': 68, 'end': 76, 'entity_type': 'PERSON', 'text': '<PERSON>', 'operator': 'replace'},
    {'start': 58, 'end': 66, 'entity_type': 'PERSON', 'text': '<PERSON>', 'operator': 'replace'},
    {'start': 26, 