In [None]:
%pip install presidio-analyzer presidio-anonymizer

In [4]:
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig

# Initialize the Presidio analyzer and anonymizer engines
analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()

# Example text containing PII 
text_to_analyze = (
    "My name is John Doe and my credit card number is 1234-5678-9101-1121. "
    "Feel free to contact me at john.doe@example.com or call 555-123-4567."
)

# Analyze the text for PII entities
results = analyzer.analyze(
    text=text_to_analyze,
    language='en'
    # Removed all_fields=True as it's not a valid parameter
)

# Print detection results
for result in results:
    print(f"Detected entity: {result.entity_type}, Score: {result.score}, Position: {result.start}-{result.end}")

# Anonymize the detected PII entities in the text
anonymized_result = anonymizer.anonymize(
    text=text_to_analyze,
    analyzer_results=results,
    operators={
        "DEFAULT": OperatorConfig("mask", {"masking_char": "*", "chars_to_mask": 0, "masking_length": 4})
    }
)

# Output the anonymized text
print("\nAnonymized text:")
print(anonymized_result.text)

Detected entity: EMAIL_ADDRESS, Score: 1.0, Position: 97-117
Detected entity: PERSON, Score: 0.85, Position: 11-19
Detected entity: URL, Score: 0.5, Position: 97-104
Detected entity: URL, Score: 0.5, Position: 106-117
Detected entity: PHONE_NUMBER, Score: 0.4, Position: 126-138
Detected entity: IN_PAN, Score: 0.05, Position: 49-59


InvalidParamError: Expected parameter from_end

In [8]:
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig

# Initialize the Presidio analyzer and anonymizer engines
analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()

# Example text containing PII 
text_to_analyze = (
    "La clienta necesita ayuda para configurar Amazon Prime, pero no puede acceder a la promoción porque no recuerda su contraseña y no sabe cómo hacerlo. Aunque menciona la configuración en el móvil, el problema principal radica en el acceso a la promoción debido a las credenciales. La descripción de la categoría 'Problemas de accesos y credenciales' menciona explícitamente 'problemas con su cuenta, credenciales, claves, contraseñas o datos de acceso', lo que se ajusta perfectamente a la situación de Rosario. "
    # "Feel free to contact me at john.doe@example.com or call 555-123-4567."
)

# Analyze the text for PII entities
results = analyzer.analyze(
    text=text_to_analyze,
    language='en'
)

# Print detection results
for result in results:
    print(f"Detected entity: {result.entity_type}, Score: {result.score}, Position: {result.start}-{result.end}")

# Anonymize the detected PII entities in the text
anonymized_result = anonymizer.anonymize(
    text=text_to_analyze,
    analyzer_results=results,
    operators={
        "PERSON": OperatorConfig("replace", {"new_value": "<PERSON>"}),
        "EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "<EMAIL>"}),
        "PHONE_NUMBER": OperatorConfig("replace", {"new_value": "<PHONE>"}),
        "CREDIT_CARD": OperatorConfig("replace", {"new_value": "<CREDIT_CARD>"}),
    }
)

# Output the anonymized text
print("\nAnonymized text:")
print(anonymized_result.text)

Detected entity: LOCATION, Score: 0.85, Position: 0-2
Detected entity: LOCATION, Score: 0.85, Position: 80-82
Detected entity: PERSON, Score: 0.85, Position: 150-194
Detected entity: LOCATION, Score: 0.85, Position: 240-242
Detected entity: LOCATION, Score: 0.85, Position: 262-265
Detected entity: PERSON, Score: 0.85, Position: 280-312
Detected entity: PERSON, Score: 0.85, Position: 453-469

Anonymized text:
<LOCATION> clienta necesita ayuda para configurar Amazon Prime, pero no puede acceder a <LOCATION> promoción porque no recuerda su contraseña y no sabe cómo hacerlo. <PERSON>, el problema principal radica en el acceso a <LOCATION> promoción debido a <LOCATION> credenciales. <PERSON>Problemas de accesos y credenciales' menciona explícitamente 'problemas con su cuenta, credenciales, claves, contraseñas o datos de acceso', <PERSON> perfectamente a la situación de Rosario. 


In [12]:
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
from presidio_analyzer.nlp_engine import NlpEngineProvider

# Create configuration containing engine name and models
configuration = {
    "nlp_engine_name": "spacy",
    "models": [
        {"lang_code": "es", "model_name": "es_core_news_md"},  # Spanish model
    ],
}

# Create NLP engine based on configuration
provider = NlpEngineProvider(nlp_configuration=configuration)
nlp_engine_spanish = provider.create_engine()

# Initialize the registry with Spanish-specific recognizers
registry = RecognizerRegistry()

# Create analyzer with Spanish configuration
analyzer = AnalyzerEngine(
    nlp_engine=nlp_engine_spanish,
    supported_languages=["es"]
)

# Example text in Spanish
# Example text in Spanish
text_to_analyze = (
    "Buenos días, soy María del servicio al cliente. ¿En qué puedo ayudarle? "
    "Hola, me llamo Antonio García Pérez. Necesito ayuda con mi cuenta. "
    "Por supuesto, ¿me puede dar su DNI? "
    "Sí, es 45678912K. "
    "¿Y su número de teléfono? "
    "Mi móvil es 666-555-444. "
    "Gracias. ¿Y su dirección actual? "
    "Vivo en Calle Mayor 123, Madrid, código postal 28001. "
    "¿Y su correo electrónico? "
    "Es antonio.garcia@gmail.com. "
    "Perfecto, ya puedo ver su cuenta en el sistema."
)

# Analyze the text with Spanish language setting
results = analyzer.analyze(
    text=text_to_analyze,
    language="es"
)

# Print detection results
for result in results:
    print(f"Detected entity: {result.entity_type}, Score: {result.score}, Position: {result.start}-{result.end}")

# Initialize anonymizer
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig

anonymizer = AnonymizerEngine()

# Anonymize the detected PII entities
anonymized_result = anonymizer.anonymize(
    text=text_to_analyze,
    analyzer_results=results,
    operators={
        "PERSON": OperatorConfig("replace", {"new_value": "<PERSONA>"}),
        "EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "<EMAIL>"}),
        "PHONE_NUMBER": OperatorConfig("replace", {"new_value": "<TELÉFONO>"}),
        "DEFAULT": OperatorConfig("replace", {"new_value": "<PII>"})
    }
)

# Output the anonymized text
print("\nAnonymized text:")
print(anonymized_result.text)

Detected entity: PERSON, Score: 0.85, Position: 1333-1347
Detected entity: LOCATION, Score: 0.85, Position: 1389-1393
Detected entity: PERSON, Score: 0.85, Position: 1449-1456
Detected entity: PERSON, Score: 0.85, Position: 1492-1499
Detected entity: PERSON, Score: 0.85, Position: 1936-1943
Detected entity: LOCATION, Score: 0.85, Position: 2025-2032
Detected entity: PERSON, Score: 0.85, Position: 2034-2047
Detected entity: LOCATION, Score: 0.85, Position: 2050-2057
Detected entity: PERSON, Score: 0.85, Position: 2125-2135
Detected entity: LOCATION, Score: 0.85, Position: 2254-2261
Detected entity: LOCATION, Score: 0.85, Position: 2263-2272
Detected entity: LOCATION, Score: 0.85, Position: 2310-2317
Detected entity: LOCATION, Score: 0.85, Position: 2354-2361
Detected entity: LOCATION, Score: 0.85, Position: 2797-2808
Detected entity: LOCATION, Score: 0.85, Position: 2818-2825
Detected entity: LOCATION, Score: 0.85, Position: 2827-2836
Detected entity: PERSON, Score: 0.85, Position: 3024