In [None]:
!pip install llama-index llama-index-postprocessor-presidio
!python -m spacy download en_core_web_lg

Collecting llama-index
  Downloading llama_index-0.10.28-py3-none-any.whl (6.9 kB)
Collecting llama-index-postprocessor-presidio
  Downloading llama_index_postprocessor_presidio-0.1.1-py3-none-any.whl (2.9 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama-index)
  Downloading llama_index_agent_openai-0.2.2-py3-none-any.whl (12 kB)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama-index)
  Downloading llama_index_cli-0.1.11-py3-none-any.whl (26 kB)
Collecting llama-index-core<0.11.0,>=0.10.28 (from llama-index)
  Downloading llama_index_core-0.10.28-py3-none-any.whl (15.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama-index)
  Downloading llama_index_embeddings_openai-0.1.7-py3-none-any.whl (6.0 kB)
Collecting llama-index-indices-managed-llama-cloud<0.2.0,>=0.1.2 (from llama-index)
  Downloading llama_index_indices_mana

# Title  
For this example I just used OAuth.

```
# This is formatted as code
```



In [None]:
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine, OperatorConfig
from presidio_anonymizer.operators import Operator, OperatorType

from typing import Dict
from pprint import pprint
import requests

In [None]:
def generate_synonym_with_gemini(prompt):
    """Sends a prompt to Gemini and returns the generated synonym."""
    # ... (Your code to send a request to Gemini)
    response = requests.post("gemini_endpoint", data=prompt)  # Adjust accordingly
    if response.status_code == 200:
        return response.text  # Assuming Gemini returns the synonym directly
    else:
        return None  # Handle errors

def anonymize_with_gemini(text, analyzer_results, operators):
    # ... (Start the same as in your existing code)

    for result in analyzer_results:
        entity_type = result.entity_type
        original_value = text[result.start:result.end]  # Extract the value

        if entity_type in operators:
            operator_config = operators[entity_type]
            operator_type = operator_config.operator_type

            if operator_type == OperatorType.Anonymize:  # Check for custom anonymizers
                prompt = f"Generate a synonym for this word/phrase: {original_value}"
                synonym = generate_synonym_with_gemini(prompt)

                if synonym:
                    text = text[:result.start] + synonym + text[result.end:]
                # else:  Handle cases where Gemini doesn't return a synonym
    return text


In [None]:
class InstanceCounterAnonymizer(Operator):
    """
    Anonymizer which replaces the entity value
    with an instance counter per entity.
    """

    REPLACING_FORMAT = "<{entity_type}_{index}>"

    def operate(self, text: str, params: Dict = None) -> str:
        """Anonymize the input text."""

        entity_type: str = params["entity_type"]

        # entity_mapping is a dict of dicts containing mappings per entity type
        entity_mapping: Dict[Dict:str] = params["entity_mapping"]

        entity_mapping_for_type = entity_mapping.get(entity_type)
        if not entity_mapping_for_type:
            new_text = self.REPLACING_FORMAT.format(
                entity_type=entity_type, index=0
            )
            entity_mapping[entity_type] = {}

        else:
            if text in entity_mapping_for_type:
                return entity_mapping_for_type[text]

            previous_index = self._get_last_index(entity_mapping_for_type)
            new_text = self.REPLACING_FORMAT.format(
                entity_type=entity_type, index=previous_index + 1
            )

        entity_mapping[entity_type][text] = new_text
        return new_text

    @staticmethod
    def _get_last_index(entity_mapping_for_type: Dict) -> int:
        """Get the last index for a given entity type."""

        def get_index(value: str) -> int:
            return int(value.split("_")[-1][:-1])

        indices = [get_index(v) for v in entity_mapping_for_type.values()]
        return max(indices)

    def validate(self, params: Dict = None) -> None:
        """Validate operator parameters."""

        if "entity_mapping" not in params:
            raise ValueError("An input Dict called `entity_mapping` is required.")
        if "entity_type" not in params:
            raise ValueError("An entity_type param is required.")

    def operator_name(self) -> str:
        return "entity_counter"

    def operator_type(self) -> OperatorType:
        return OperatorType.Anonymize

In [None]:
# Usage
# private_data = {... Your extracted data }
# anonymized_data = anonymize_with_gemini(private_data)
# print(anonymized_data)



text = """
Therapist Notes:
Patient Name: Sarah Johnson
DOB: 05/12/1985
Session Date: 04/02/2024
Sarah discussed her ongoing struggles with anxiety and depression. She mentioned that her symptoms have worsened since losing her job at TechCorp Inc. last month. Sarah revealed that she has been having suicidal thoughts and has considered overdosing on her prescribed Xanax medication. She expressed feeling hopeless about her future and her ability to provide for her two children, Emily (age 8) and Jacob (age 5). Sarah also shared that her mother, Mary Johnson, was recently diagnosed with breast cancer, which has added to her stress and feelings of overwhelm.

Session Transcript:
Therapist: Good morning, Michael. How have you been feeling since our last session?
Michael: Not great, to be honest. I've been really struggling with my PTSD symptoms lately. The nightmares about my deployment in Afghanistan have been more frequent and intense. I keep reliving the IED explosion that killed my best friend, Chris Thompson. It happened on August 15, 2019, and I can't seem to shake the guilt and the memory of seeing his body torn apart. I've been self-medicating with alcohol more often, usually drinking a fifth of vodka each night just to fall asleep. My wife, Jessica, is really worried about me, and I'm scared that my drinking is going to ruin our marriage. I don't know how much longer I can keep going like this.
"""

#print("original text:")
#pprint(text)
analyzer = AnalyzerEngine()
analyzer_results = analyzer.analyze(text=text, language="en")
print("analyzer results:")
print(analyzer_results)



analyzer results:
[type: DATE_TIME, start: 76, end: 86, score: 0.95, type: PERSON, start: 32, end: 45, score: 0.85, type: DATE_TIME, start: 51, end: 87, score: 0.85, type: PERSON, start: 87, end: 92, score: 0.85, type: DATE_TIME, start: 238, end: 248, score: 0.85, type: PERSON, start: 250, end: 255, score: 0.85, type: PERSON, start: 471, end: 476, score: 0.85, type: PERSON, start: 489, end: 494, score: 0.85, type: DATE_TIME, start: 496, end: 501, score: 0.85, type: PERSON, start: 504, end: 509, score: 0.85, type: PERSON, start: 539, end: 551, score: 0.85, type: PERSON, start: 699, end: 706, score: 0.85, type: PERSON, start: 758, end: 765, score: 0.85, type: LOCATION, start: 888, end: 899, score: 0.85, type: PERSON, start: 999, end: 1013, score: 0.85, type: DATE_TIME, start: 1030, end: 1045, score: 0.85, type: PERSON, start: 1255, end: 1262, score: 0.85, type: IN_PAN, start: 142, end: 152, score: 0.05, type: IN_PAN, start: 316, end: 326, score: 0.05, type: IN_PAN, start: 327, end: 337, 

In [None]:
# CUSTOMIZE WHAT INFO IS MASKED HERE

class RedactionAnonymizer(Operator):
    def operate(self, text: str, params: Dict = None) -> str:
       return "[REDACTED]"

class PlaceholderAnonymizer(Operator):
    def operate(self, text: str, params: Dict = None) -> str:
       return f"[{params['placeholder']}]"

In [None]:
# Add them to the AnonymizerEngine
anonymizer_engine.add_anonymizer(RedactionAnonymizer())
anonymizer_engine.add_anonymizer(PlaceholderAnonymizer())
# Create Anonymizer engine and add the custom anonymizer
anonymizer_engine = AnonymizerEngine()
anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer)

# Create a mapping between entity types and counters
entity_mapping = dict()

# Anonymize the text

anonymized_result = anonymizer_engine.anonymize(
    text,
    analyzer_results,
    {
        "DEFAULT": OperatorConfig(
            "entity_counter", {"entity_mapping": entity_mapping}
        )
    },
)

#print(anonymized_result.text)

#pprint(entity_mapping, indent=2)

In [None]:

private_data = {... Your extracted data }
anonymized_data = anonymize_with_gemini(private_data)
print(anonymized_data)
