In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from google import genai
from google.genai import types

# The client gets the API key from the environment variable `GEMINI_API_KEY`.
client = genai.Client()
contents = """
 Intput Text: 
 My name is Degaga wolde. You can call me on 0947381036 or email me on degagawolde@gmail.com."""

# Optimized System Instruction: Sets the role, constraints, and output format.
optimized_system_instruction = """
You are an expert PII identification tool for legal documents. Your task is to analyze the 
provided text and extract all instances of Personally Identifiable Information (PII). 

PII Categories to identify:
1. HUMAN_NAME (including partial names, nicknames)
2. COMPANY_NAME
3. DATE
4. ADDRESS (street, city, postal codes)
5. EMAIL_ADDRESS
6. PHONE_NUMBER

Do not generate any conversational text, explanations, or analysis. For each entity, 
return the exact text, the assigned type, the character start index, and the character 
end index (exclusive). Do not miss partial matches or embedded entities.
Output must be a single JSON object. The keys must be the PII categories, 
and the value must be a list of all detected instances. 
If a category is not found, its list should be empty.
"""
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=contents,
    config=types.GenerateContentConfig(
        # 1. Use JSON mode for reliable structured output
        response_mime_type="application/json",
        # 2. Use the detailed, optimized system instruction
        system_instruction=optimized_system_instruction,
        # 3. Increase temperature (optional, but sometimes helps with classification)
        # temperature=0.2
        # 4. Remove thinking_config (it's not needed by default)
    ),
)
print(response.text)

{
  "HUMAN_NAME": [
    {
      "text": "Degaga wolde",
      "start_char": 11,
      "end_char": 23
    }
  ],
  "COMPANY_NAME": [],
  "DATE": [],
  "ADDRESS": [],
  "EMAIL_ADDRESS": [
    {
      "text": "degagawolde@gmail.com",
      "start_char": 71,
      "end_char": 92
    }
  ],
  "PHONE_NUMBER": [
    {
      "text": "0947381036",
      "start_char": 42,
      "end_char": 52
    }
  ]
}
