In [None]:
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
anonymizer = PresidioReversibleAnonymizer(
  analyzed_fields=["PERSON","PHONE_NUMBER","EMAIL_ADDRESS","CREDIT_CARD"],
  faker_seed=42
)
anonymizer.anonymize(
  "My name is slim shady, call me at 313-666-7559 or email me at real.slim.shady@gmail.com"
  "My credit card is 4916 0382 3232 0732"
)

In [None]:
import transformers
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModelForTokenClassification

transformers_model = "StanfordAIMI/stanford-deidentifier-base"

snapshot_download(repo_id=transformers_model)

# Instantiate to make sure it's downloaded during installation and not runtime
AutoTokenizer.from_pretrained(transformers_model)
AutoModelForTokenClassification.from_pretrained(transformers_model)

In [7]:
from presidio_analyzer.nlp_engine import NlpEngineProvider, TransformersNlpEngine, NlpEngine, SpacyNlpEngine, NerModelConfiguration
import os
from new_presidio import PresidioReversibleAnonymizer
REDIS_IP_APP = os.getenv('REDIS_IP')
REDIS_PASSWORD_APP = os.getenv('REDIS_PASSWORD')
# Your Transformers configuration
model_config = [
    {"lang_code": "en",
     "model_name": {
         "spacy": "en_core_web_sm", # for tokenization, lemmatization
         "transformers": "StanfordAIMI/stanford-deidentifier-base" # for NER
    }
}]

# Entity mappings between the model's and Presidio's
mapping = dict(
    PER="PERSON",
    LOC="LOCATION",
    ORG="ORGANIZATION",
    AGE="AGE",
    ID="ID",
    EMAIL="EMAIL",
    DATE="DATE_TIME",
    PHONE="PHONE_NUMBER",
    PERSON="PERSON",
    LOCATION="LOCATION",
    GPE="LOCATION",
    ORGANIZATION="ORGANIZATION",
    NORP="NRP",
    PATIENT="PERSON",
    STAFF="PERSON",
    HOSP="LOCATION",
    PATORG="ORGANIZATION",
    TIME="DATE_TIME",
    HCW="PERSON",
    HOSPITAL="LOCATION",
    FACILITY="LOCATION",
    VENDOR="ORGANIZATION",
)

labels_to_ignore = ["O"]

ner_model_configuration = NerModelConfiguration(
    model_to_presidio_entity_mapping=mapping,
    alignment_mode="expand", # "strict", "contract", "expand"
    aggregation_strategy="max", # "simple", "first", "average", "max"
    labels_to_ignore = labels_to_ignore)

transformers_nlp_engine = TransformersNlpEngine(
    models=model_config,
    ner_model_configuration=ner_model_configuration)

redis_key="W06H6u6sViLpgQKmos4jln435LlG7PX7taIVutLywjg="
# Initialize the PresidioReversibleAnonymizer with the custom transformers_nlp_engine
anonymizer = PresidioReversibleAnonymizer(
  analyzed_fields=["PERSON","PHONE_NUMBER","EMAIL_ADDRESS","CREDIT_CARD"],
  nlp_engine=transformers_nlp_engine, # Pass the pre-configured TransformersNlpEngine instance
  faker_seed=42,
  redis_host=REDIS_IP_APP,
  redis_password=REDIS_PASSWORD_APP,
  encryption_key=redis_key
)

# Now, anonymize text using the Transformers engine
anonymized_text = anonymizer.anonymize(
  "My name is slim shady, call me at 313-666-7559 or email me at real.slim.shady@gmail.com. "
  "My credit card is 4916 0382 3232 0732."
)
print(anonymized_text)



Device set to use cpu
INFO:presidio-analyzer:registry not provided, creating default.
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsBankRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsLicenseRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsItinRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsPassportRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsSsnRecognizer
INFO:presidio-analyzer:Loaded recognizer: NhsRecognizer
INFO:presidio-analyzer:Loaded recognizer: EsNifRecognizer
INFO:presidio-analyzer:Loaded recognizer: EsNieRecognizer
INFO:presidio-analyzer:Loaded recognizer: ItDriverLicenseRecognizer
INFO:presidio-analyzer:Loaded recognizer: ItFiscalCodeRecognizer
INFO:presidio-analyzer:Loaded recognizer: ItVatCodeRecog

My name is Roy Martin, call me at +1-649-359-3103 or email me at jpeterson@example.org. My credit card is 538.990.8386.




In [8]:
import os
anonymizer.save_deanonymizer_mapping_to_redis(redis_key)

INFO:new_presidio:Encrypted anonymizer mapping saved to Redis under key: W06H6u6sViLpgQKmos4jln435LlG7PX7taIVutLywjg=


'W06H6u6sViLpgQKmos4jln435LlG7PX7taIVutLywjg='

## Deanonymize

In [5]:
from presidio_analyzer.nlp_engine import NlpEngineProvider, TransformersNlpEngine, NlpEngine, SpacyNlpEngine, NerModelConfiguration
import os
from new_presidio import PresidioReversibleAnonymizer
REDIS_IP_APP = os.getenv('REDIS_IP')
REDIS_PASSWORD_APP = os.getenv('REDIS_PASSWORD')
# Your Transformers configuration
model_config = [
    {"lang_code": "en",
     "model_name": {
         "spacy": "en_core_web_sm", # for tokenization, lemmatization
         "transformers": "StanfordAIMI/stanford-deidentifier-base" # for NER
    }
}]

# Entity mappings between the model's and Presidio's
mapping = dict(
    PER="PERSON",
    LOC="LOCATION",
    ORG="ORGANIZATION",
    AGE="AGE",
    ID="ID",
    EMAIL="EMAIL",
    DATE="DATE_TIME",
    PHONE="PHONE_NUMBER",
    PERSON="PERSON",
    LOCATION="LOCATION",
    GPE="LOCATION",
    ORGANIZATION="ORGANIZATION",
    NORP="NRP",
    PATIENT="PERSON",
    STAFF="PERSON",
    HOSP="LOCATION",
    PATORG="ORGANIZATION",
    TIME="DATE_TIME",
    HCW="PERSON",
    HOSPITAL="LOCATION",
    FACILITY="LOCATION",
    VENDOR="ORGANIZATION",
)

labels_to_ignore = ["O"]

ner_model_configuration = NerModelConfiguration(
    model_to_presidio_entity_mapping=mapping,
    alignment_mode="expand", # "strict", "contract", "expand"
    aggregation_strategy="max", # "simple", "first", "average", "max"
    labels_to_ignore = labels_to_ignore)

transformers_nlp_engine = TransformersNlpEngine(
    models=model_config,
    ner_model_configuration=ner_model_configuration)

redis_key="W06H6u6sViLpgQKmos4jln435LlG7PX7taIVutLywjg="
# Initialize the PresidioReversibleAnonymizer with the custom transformers_nlp_engine
anonymizer = PresidioReversibleAnonymizer(
  analyzed_fields=["PERSON","PHONE_NUMBER","EMAIL_ADDRESS","CREDIT_CARD"],
  nlp_engine=transformers_nlp_engine, # Pass the pre-configured TransformersNlpEngine instance
  faker_seed=42,
  redis_host=REDIS_IP_APP,
  redis_password=REDIS_PASSWORD_APP,
  encryption_key=redis_key
)


Device set to use cpu
INFO:presidio-analyzer:registry not provided, creating default.
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: CreditCardRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsBankRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsLicenseRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsItinRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsPassportRecognizer
INFO:presidio-analyzer:Loaded recognizer: UsSsnRecognizer
INFO:presidio-analyzer:Loaded recognizer: NhsRecognizer
INFO:presidio-analyzer:Loaded recognizer: EsNifRecognizer
INFO:presidio-analyzer:Loaded recognizer: EsNieRecognizer
INFO:presidio-analyzer:Loaded recognizer: ItDriverLicenseRecognizer
INFO:presidio-analyzer:Loaded recognizer: ItFiscalCodeRecognizer
INFO:presidio-analyzer:Loaded recognizer: ItVatCodeRecog

In [7]:
anonymizer.load_deanonymizer_mapping_from_redis(redis_key)
anonymized_text="""We're sorry Roy Martin for the inconvenience. We will let you know soon through your email at jpeterson@example.org. 
We will call your number +1-649-359-3103 to proceed with the credit card number of 538.990.8386."""
deanonymized_text = anonymizer.deanonymize(anonymized_text)
print(deanonymized_text)

INFO:new_presidio:Encrypted anonymizer mapping loaded and decrypted for key: W06H6u6sViLpgQKmos4jln435LlG7PX7taIVutLywjg=


We're sorry slim shady for the inconvenience. We will let you know soon through your email at real.slim.shady@gmail.com. 
We will call your number 313-666-7559 to proceed with the credit card number of 4916 0382 3232 0732.
