# 1. Data<span style="color:red"> Loss</span> <span style="color:green">Prevention</span>


In [4]:
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import NlpEngineProvider

# Create configuration containing engine name and models
configuration = {
    "nlp_engine_name": "spacy",
    "models": [{"lang_code": "en", "model_name": "en_core_web_sm"}],
}

# Create NLP engine based on configuration
provider = NlpEngineProvider(nlp_configuration=configuration)
nlp_engine = provider.create_engine()

# Function to check for PPI in the email text
def check_for_ppi(email_text):
    analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=['en'])
    analysis_results = analyzer.analyze(text=email_text, language='en')
    if analysis_results:
        print("PPI detected, message blocked")
    else:
        print("No PPI detected, message safe to send")

# Example usage
email_input = """
Subject: $50M Deal with Tesla – Update

Hi Mark,
Quick update on the $50M contract with Tesla. Sarah and I had a productive call with Elon yesterday.
We're ironing out the final details and expect to close the deal by next Friday.
Let's touch base tomorrow at 10 AM to discuss the next steps.
You can reach me at 212-555-1234 if you have any questions.
Best, John.
"""
check_for_ppi(email_input)


PPI detected, message blocked


# 2. Data<span style="color:lightpink"> Masking</span>


In [17]:
from presidio_analyzer import AnalyzerEngine, PatternRecognizer, Pattern
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities.engine import OperatorConfig

# Create configuration containing engine name and models
configuration = {
    "nlp_engine_name": "spacy",
    "models": [{"lang_code": "en", "model_name": "en_core_web_lg"}],
}

# Create NLP engine based on configuration
provider = NlpEngineProvider(nlp_configuration=configuration)
nlp_engine = provider.create_engine()
# Function to anonymize PPI in the email text
def anonymize_ppi(email_text):
    analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=['en'])
    
    # Adding custom recognizers for names and organizations not recognized by default
    elon_recognizer = PatternRecognizer(supported_entity="PERSON", patterns=[Pattern(name="elon", regex=r'\bElon\b', score=1.0)])
    tesla_recognizer = PatternRecognizer(supported_entity="ORGANIZATION", patterns=[Pattern(name="tesla", regex=r'\bTesla\b', score=1.0)])
    analyzer.registry.add_recognizer(elon_recognizer)
    analyzer.registry.add_recognizer(tesla_recognizer)
    anonymizer = AnonymizerEngine()
    analysis_results = analyzer.analyze(text=email_text, language='en')
    
    # Define specific operators for different types of PII
    operators = {
        "PERSON": OperatorConfig("replace", {"new_value": "PERSON"}),
        "PHONE_NUMBER": OperatorConfig("replace", {"new_value": "PHONE_NUMBER"}),
        "ORGANIZATION": OperatorConfig("replace", {"new_value": "ORGANIZATION"}),
        "MONEY": OperatorConfig("replace", {"new_value": "MONEY"}),
        "DATE_TIME": OperatorConfig("replace", {"new_value": "DATE_TIME"}),
        "DEFAULT": OperatorConfig("replace", {"new_value": "DEFAULT"})
    }
    
    anonymized_results = anonymizer.anonymize(
        text=email_text,
        analyzer_results=analysis_results,
        operators=operators
    )
    return anonymized_results.text

# Example usage
email_input = """
Subject: $50M Deal with Tesla – Update

Hi Mark,
Quick update on the $50M contract with Tesla. Sarah and I had a productive call with Elon yesterday.
We're ironing out the final details and expect to close the deal by next Friday.
Let's touch base tomorrow at 10 AM to discuss the next steps.
You can reach me at 212-555-1234 if you have any questions.
Best, John.
"""
print(anonymize_ppi(email_input))



Subject: $50M Deal with ORGANIZATION – Update

Hi PERSON,
Quick update on the $50M contract with ORGANIZATION. PERSON and I had a DEFAULT call with PERSON DATE_TIME.
We're ironing out the final details and expect to close the deal by DATE_TIME.
Let's touch base DATE_TIME at DATE_TIME to discuss the next steps.
You can reach me at PHONE_NUMBER if you have any questions.
Best, PERSON.



# 3. Data<span style="color:lightgreen"> Substitution</span>


In [4]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
import os

anonymizer = PresidioReversibleAnonymizer()

template = """Rewrite this text into an official, short email, keep names (e.g. do not use Mr. or Mrs.):

{anonymized_text}"""

prompt = PromptTemplate.from_template(template)
llm = ChatOpenAI(temperature=0, model="gpt-4-turbo", api_key=os.getenv("API_KEY"))

# Example usage
email_input = """
Subject: $50M Deal with Tesla – Update

Hi Mark,
Quick update on the $50M contract with Tesla. Sarah and I had a productive call with Elon yesterday.
We're ironing out the final details and expect to close the deal by next Friday.
Let's touch base tomorrow at 10 AM to discuss the next steps.
You can reach me at 212-555-1234 if you have any questions.
Best, John.
"""

chain = {"anonymized_text": anonymizer.anonymize} | prompt
response = chain.invoke(email_input)
print("╔════════════════════════════════════════╗")
print("║ Anonymized Email:                      ║")
print("╚════════════════════════════════════════╝")
print(response.text)


chain = chain | llm
response = chain.invoke(email_input)
print("╔════════════════════════════════════════╗")
print("║ Anonymized GPT Response:               ║")
print("╚════════════════════════════════════════╝")
print(response.content)


print("")
print("")
chain = chain | (lambda ai_message: anonymizer.deanonymize(ai_message.content))
response = chain.invoke(email_input)
print("╔════════════════════════════════════════╗")
print("║ De-anonymized GPT Response:            ║")
print("╚════════════════════════════════════════╝")
print(response)


╔════════════════════════════════════════╗
║ Anonymized Email:                      ║
╚════════════════════════════════════════╝
Rewrite this text into an official, short email, keep names (e.g. do not use Mr. or Mrs.):


Subject: $50M Deal with Tesla – Update

Hi Curtis Allen,
Quick update on the $50M contract with Tesla. Aaron Campos and I had a productive call with Elon 1972-12-11.
We're ironing out the final details and expect to close the deal by 2020-08-20.
Let's touch base 1997-05-06 at 2018-04-06 to discuss the next steps.
You can reach me at (886)648-7028x8008 if you have any questions.
Best, Jose Mcfarland.

╔════════════════════════════════════════╗
║ Anonymized GPT Response:               ║
╚════════════════════════════════════════╝
Subject: Update on $50M Tesla Contract

Dear Curtis Allen,

I am writing to inform you about the progress of our $50M contract with Tesla. Aaron Campos and I had a productive discussion with Elon Musk on December 11, 1972. We are currently final

In [None]:
from IPython.display import IFrame

# https://frangin2003.github.io/
IFrame(src="https://frangin2003.github.io/", width="100%", height=800)


# 3.b Data<span style="color:lightblue"> Substitution</span> with Ollama

In [7]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_community.llms import Ollama
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
import os

anonymizer = PresidioReversibleAnonymizer(
    analyzed_fields=["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "ORGANIZATION", "MONEY", "DATE_TIME"],
    # Faker seed is used here to make sure the same fake data is generated for the test purposes
    # In production, it is recommended to remove the faker_seed parameter (it will default to None)
    faker_seed=42,
)

template = """Rewrite this text into an official, short email, keep names (e.g. do not use Mr. or Mrs.):

{anonymized_text}"""
prompt = PromptTemplate.from_template(template)
llm = Ollama(
    model="llama3",
)

chain = {"anonymized_text": anonymizer.anonymize} | prompt | llm

# Example usage
email_input = """
Subject: $50M Deal with Tesla – Update

Hi Mark,
Quick update on the $50M contract with Tesla. Sarah and I had a productive call with Elon yesterday.
We're ironing out the final details and expect to close the deal by next Friday.
Let's touch base tomorrow at 10 AM to discuss the next steps.
You can reach me at 212-555-1234 if you have any questions.
Best, John.
"""

chain = {"anonymized_text": anonymizer.anonymize} | prompt
response = chain.invoke(email_input)
print("╔════════════════════════════════════════╗")
print("║ Anonymized Email:                      ║")
print("╚════════════════════════════════════════╝")
print(response.text)


chain = chain | llm
response = chain.invoke(email_input)
print("╔════════════════════════════════════════╗")
print("║ Anonymized Llama3 Response:            ║")
print("╚════════════════════════════════════════╝")
print(response)


print("")
print("")
chain = chain | (lambda ai_message: anonymizer.deanonymize(ai_message))
response = chain.invoke(email_input)
print("╔════════════════════════════════════════╗")
print("║ De-anonymized Llama3 Response:         ║")
print("╚════════════════════════════════════════╝")
print(response)


╔════════════════════════════════════════╗
║ Anonymized Email:                      ║
╚════════════════════════════════════════╝
Rewrite this text into an official, short email, keep names (e.g. do not use Mr. or Mrs.):


Subject: $50M Deal with Tesla – Update

Hi Laura Bush,
Quick update on the $50M contract with Tesla. Valerie Gray and I had a productive call with Elon 1993-05-29.
We're ironing out the final details and expect to close the deal by 1976-07-31.
Let's touch base 1976-04-24 at 1992-11-27 to discuss the next steps.
You can reach me at 001-486-537-9402x654 if you have any questions.
Best, Noah Rhodes.

╔════════════════════════════════════════╗
║ Anonymized GPT Response:               ║
╚════════════════════════════════════════╝
Here is a rewritten version of the text in the form of an official email:

Subject: Update on $50M Deal with Tesla

Dear Laura Bush,

I am writing to provide an update on our $50M contract with Tesla. I had a productive call yesterday with Elon Mus