In [None]:
import os
import json
import uuid
from typing import List, Optional, Literal
from pydantic import BaseModel, UUID4, HttpUrl, Field, ValidationError
from datetime import date
from enum import Enum

from dotenv import load_dotenv
import os
load_dotenv()
import google.generativeai as genai

In [24]:
# Attempt to import google.generativeai, handle if not installed
try:
    import google.generativeai as genai
    from google.generativeai.types import GenerationConfig, HarmCategory, HarmBlockThreshold
except ImportError:
    print("google-generativeai library not found. Please install it using 'pip install google-generativeai'")
    exit()

In [None]:
# --- Enums based on TypeScript literal types ---
class Gender(str, Enum):
    male = "male"
    female = "female"
    other = "other"
    unknown = "unknown"

class RelationshipType(str, Enum):
    parent = "parent"
    spouse = "spouse"
    sibling = "sibling"
    child = "child"
    grandparent = "grandparent"
    grandchild = "grandchild"
    friend = "friend"
    colleague = "colleague"
    other = "other"
    father = "father" # Retained as per original, LLM should be guided by prompt
    mother = "mother" # Retained as per original, LLM should be guided by prompt


class AllergySeverity(str, Enum):
    mild = "mild"
    moderate = "moderate"
    severe = "severe"

class SmokingStatus(str, Enum):
    never = "never"
    former = "former"
    current = "current"
    unknown = "unknown"

class ThemeType(str, Enum):
    light = "light"
    dark = "dark"

# --- Pydantic Models based on TypeScript Interfaces ---

class Source(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    title: str
    url: Optional[HttpUrl] = None
    text: Optional[str] = None
    appliesToField: Optional[str] = None

class HealthCondition(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    name: str
    diagnosisDate: Optional[date] = None
    notes: Optional[str] = None

class Allergy(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    name: str
    severity: AllergySeverity

class Medication(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    name: str
    dosage: Optional[str] = None
    isCurrent: bool

class VitalOrScreening(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    type: str
    value: str
    date: date

class Lifestyle(BaseModel):
    smoking: SmokingStatus = SmokingStatus.unknown
    dietNotes: Optional[str] = None
    exerciseNotes: Optional[str] = None

class CustomTimelineEvent(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    date: date
    title: Optional[str] = None
    description: str
    icon: Optional[str] = None

class FamilyMember(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4) # Will be overridden by post-processing logic
    firstName: str
    lastName: str
    maidenName: Optional[str] = None
    birthDate: Optional[date] = None
    deathDate: Optional[date] = None
    causeOfDeath: Optional[str] = None
    gender: Gender = Gender.unknown
    photoUrl: Optional[HttpUrl] = None
    birthPlace: Optional[str] = None
    deathPlace: Optional[str] = None
    occupation: Optional[str] = None
    bio: Optional[str] = None
    isPrivate: Optional[bool] = False
    conditions: List[HealthCondition] = Field(default_factory=list)
    allergies: List[Allergy] = Field(default_factory=list)
    medications: List[Medication] = Field(default_factory=list)
    lifestyle: Optional[Lifestyle] = Field(default_factory=lambda: Lifestyle(smoking=SmokingStatus.unknown))
    vitals: List[VitalOrScreening] = Field(default_factory=list)
    customTimelineEvents: List[CustomTimelineEvent] = Field(default_factory=list)
    sources: List[Source] = Field(default_factory=list)
    generation: Optional[int] = None

class Relationship(BaseModel):
    id: UUID4 = Field(default_factory=uuid.uuid4)
    from_member: UUID4 = Field(alias="from")
    to_member: UUID4 = Field(alias="to")
    type: RelationshipType
    startDate: Optional[date] = None
    endDate: Optional[date] = None
    details: Optional[str] = None

class FamilyTreeData(BaseModel):
    members: List[FamilyMember] = Field(default_factory=list)
    relationships: List[Relationship] = Field(default_factory=list)
    theme: ThemeType = ThemeType.light


In [26]:
# --- Helper function to get Pydantic schema as string for the prompt ---
def get_schema_prompt_string():
    return f"""
    You MUST strictly output a single JSON object. Do NOT include any markdown like ```json or ```.
    The JSON object must conform to the following structure.

    Regarding "id" fields:
    - For each object in the "members" array (e.g., `members[0].id`), you MUST provide a unique temporary string placeholder (e.g., "temp_member_001", "person_alpha"). This placeholder will be used to link relationships.
    - In the "relationships" array, the "from" and "to" fields MUST refer to these temporary string placeholders you assigned to the members.
    - For ALL OTHER "id" fields (e.g., `members[0].conditions[0].id`, `relationships[0].id`, `members[0].sources[0].id`, etc.), you should OMIT the "id" field entirely or set its value to `null`. These IDs will be automatically generated by the system later.

    Structure:
    {{
      "members": [
        {{
          "id": "string (Unique temporary placeholder, e.g., 'temp_member_1')",
          "firstName": "string",
          "lastName": "string",
          "maidenName": "string (optional)",
          "birthDate": "string (YYYY-MM-DD format, optional)",
          "deathDate": "string (YYYY-MM-DD format, optional)",
          "causeOfDeath": "string (optional)",
          "gender": "string ('male', 'female', 'other', or 'unknown')",
          "photoUrl": "string (URL format, optional)",
          "birthPlace": "string (optional)",
          "deathPlace": "string (optional)",
          "occupation": "string (optional)",
          "bio": "string (optional)",
          "isPrivate": "boolean (optional, defaults to false)",
          "conditions": [
            {{
              // "id" field OMITTED or null here
              "name": "string (e.g., 'Malaria', 'Viral Fever', 'Leg Injury from Accident')",
              "diagnosisDate": "string (YYYY-MM-DD format, optional)",
              "notes": "string (optional, e.g., 'Surgery of legs')"
            }}
          ],
          "allergies": [
            // Example: {{ "name": "Peanuts", "severity": "severe" }} // "id" field OMITTED or null
          ],
          "medications": [
            // Example: {{ "name": "Aspirin", "dosage": "100mg", "isCurrent": true }} // "id" field OMITTED or null
          ],
          "lifestyle": {{
            "smoking": "string ('never', 'former', 'current', or 'unknown')",
            "dietNotes": "string (optional)",
            "exerciseNotes": "string (optional)"
          }},
          "vitals": [
            // Example: {{ "type": "Blood Pressure", "value": "120/80", "date": "YYYY-MM-DD"}} // "id" field OMITTED or null
          ],
          "customTimelineEvents": [
            // Example: {{ "date": "YYYY-MM-DD", "description": "Graduated University", "title": "Graduation" }} // "id" field OMITTED or null
          ],
          "sources": [
            // Example: {{ "title": "Birth Certificate", "url": "http://example.com/cert.pdf", "text": "Official birth record.", "appliesToField": "birthDate" }} // "id" field OMITTED or null
          ],
          "generation": "integer (optional, e.g., 1 for Anurag, 0 for parents if Anurag is root)"
        }}
      ],
      "relationships": [
        {{
          // "id" field OMITTED or null here
          "from": "string (Must match a temporary placeholder 'id' from a member)",
          "to": "string (Must match a temporary placeholder 'id' from a member)",
          "type": "string ('parent', 'spouse', 'child', etc.)",
          "startDate": "string (YYYY-MM-DD format, optional)",
          "endDate": "string (YYYY-MM-DD format, optional)",
          "details": "string (optional)"
        }}
      ],
      "theme": "string ('light' or 'dark', defaults to 'light')"
    }}

    Ensure all specified fields are present in each object (unless explicitly stated they can be omitted like certain 'id' fields), using null for optional fields if no information is available, or empty lists for list fields.
    Infer dates as best as possible. For example, "June 2024" could be "2024-06-01" if a specific day isn't mentioned.
    For relationships, consistently use the 'parent' type where the 'from' member is the parent and 'to' member is the child.
    Pay close attention to field names, for example, use "appliesToField" in "sources", not "field".
    """


In [None]:
# --- 1. Configure Gemini Client ------------------------------------------
try:
    api_key = 'AIzaSyC4Tvo195jhgPu2ciymMHOMbjJsZV-MdUs'
    if not api_key:
        raise ValueError("GOOGLE_API_KEY environment variable not set. Please set it to your Gemini API key.")
    genai.configure(api_key=api_key)

    generation_config = GenerationConfig(
        response_mime_type="application/json",
    )
    model = genai.GenerativeModel(
        "gemini-2.0-flash", # Using a recognized model
        generation_config=generation_config,
        safety_settings={ # Added basic safety settings
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
        }
    )
except Exception as e:
    print(f"Error configuring Gemini client: {e}")
    exit()

In [28]:
# --- 2. Your natural‐language instructions -------------------------------
nl_instructions = """
Please create a family tree. The main theme for the display should be light.

Let's start with my father, Rajesh Sharma. He was born on March 15, 1970, in Delhi, India. He is now a Retired Civil Engineer. A little bio for him: "Loves gardening and reading history." For his lifestyle, he is a former smoker, having quit back in 2010. He maintains a balanced, home-cooked diet and makes sure to walk for 30 minutes every day.
Regarding his health, Rajesh was diagnosed with Type 2 Diabetes on June 20, 2018; the note for this is "Managed with diet and Metformin." He currently takes Metformin 500mg daily. A recent vital sign recorded on May 1, 2025, was his blood pressure at 130/80 mmHg. A significant custom timeline event for him was when he "Retired from government service" on March 31, 2023; we can use a 'Briefcase' icon for this. He belongs to generation 0.

Next is my mother, Priya Sharma. Her maiden name was Gupta. She was born on August 22, 1975, in Lucknow, India. She works as a Part-time Yoga Instructor. Her bio is: "Passionate about holistic health." Priya has never smoked. Her diet is vegetarian, and she enjoys organic food. For exercise, she practices yoga daily and teaches three times a week.
Health-wise, Priya was diagnosed with Hypothyroidism on February 10, 2015, and the note says she is "Taking Eltroxin." Her current medication is Eltroxin 50mcg daily. She has an allergy to dust mites, which is of moderate severity. Her photo can be found at "https://example.com/priya_sharma.jpg". I have a source for her occupation: the title is "Yoga Instructor Certification", the URL is "https://example.com/yoga_cert.pdf", and the text is "Certified by XYZ Yoga Institute in 2012". This source applies to the 'occupation' field. She is also generation 0.

Rajesh and Priya are married. They got married on November 25, 1998. A detail about their relationship: "They had a traditional Indian wedding in Delhi." This is a spouse relationship.

Now for their children, who are generation 1.
First, their son, Rohan Sharma. He was born on July 10, 2000, in Mumbai, India, and his gender is male. He is a Software Developer at "TechSolutions Inc.". His bio: "Tech enthusiast and avid gamer." Rohan is a non-smoker. His diet note is "Often eats out," and for exercise, "Plays badminton on weekends."
In terms of health, Rohan had Chickenpox way back on April 1, 2005, and "Recovered fully." He has a severe allergy to penicillin. He once used Amoxicillin at a dosage of "250mg thrice daily for 7 days" for a past infection, but this is not a current medication. A vital for him is his height, which was 175cm, recorded on January 10, 2024. Rohan is Rajesh's and Priya's child.

Their second child is a daughter, Meera Sharma. She was born on September 5, 2004, in Mumbai, India, and her gender is female. Meera is currently a University Student studying Arts. Her bio is: "Loves painting and poetry. Her profile should be private." So, please set isPrivate to true for her. She has never smoked. Her diet consists of "Prefers simple home-cooked meals," and she "Enjoys cycling" for exercise.
Meera was diagnosed with Iron Deficiency Anemia on November 15, 2023, with a note "Taking iron supplements." She is currently taking "Ferrous Sulphate 200mg once daily." She has a mild allergy to cats. For her vitals, her last flu vaccine was on October 20, 2024; the type is "Vaccination" and the value is "Influenza Vaccine". A custom timeline event for Meera is that she "Graduated High School with Honors" on May 20, 2023, and we can use a 'GraduationCap' icon. Meera is also Rajesh's and Priya's child.
"""

In [29]:
# --- 3. Build a prompt that enforces JSON output -------------------------
schema_description = get_schema_prompt_string()
prompt = f"""
{schema_description}

Based on the following instructions, extract the information and generate a single JSON object strictly conforming to the schema described above.
Pay special attention to the "id" field generation rules and field names (e.g., "appliesToField") as mentioned in the schema description.

Instructions:
{nl_instructions}
"""


In [30]:
# --- 4. Call Gemini and get JSON -----------------------------------------
print("--- Sending prompt to Gemini ---")
# print(f"Prompt being sent:\n{prompt}\n") # Uncomment for debugging the prompt

output_filename = "family_tree_output.json"
raw_json_text = "" # Initialize to ensure it's defined in case of early error

try:
    response = model.generate_content(prompt)
    raw_json_text = response.text
    
    print("\n--- Raw JSON from Model ---")
    print(raw_json_text)

    # --- 5. Post-process LLM output to fix/generate UUIDs and map relationships ---
    llm_data = json.loads(raw_json_text)
    member_id_map = {} # To map LLM temporary IDs to new UUIDs

    # Process members and their nested objects
    for member_data in llm_data.get("members", []):
        llm_member_id = member_data.get("id") 
        
        new_member_uuid = str(uuid.uuid4())
        
        if llm_member_id: # If LLM provided a temporary ID as instructed
            member_id_map[llm_member_id] = new_member_uuid
        else:
            # Fallback: if LLM failed to provide a temp ID. This shouldn't happen with the new prompt.
            # We'll assign the new UUID directly and hope it's not referenced in relationships,
            # or that relationships also missed their 'from'/'to' which would be a bigger LLM error.
            print(f"Warning: Member '{member_data.get('firstName')}' did not have a temporary ID from LLM. Assigning new UUID directly.")
        
        member_data["id"] = new_member_uuid # Replace/set temp ID with real UUID

        # For nested lists, Pydantic will generate IDs if missing/null due to default_factory.
        # We ensure any 'id' field the LLM *might* have put (against instructions) is removed.
        for sub_list_key in ["conditions", "allergies", "medications", "vitals", "customTimelineEvents", "sources"]:
            for item in member_data.get(sub_list_key, []):
                if "id" in item: 
                    del item["id"] # Remove it, Pydantic will create it
                # Also, fix potential 'field' vs 'appliesToField' for sources
                if sub_list_key == "sources" and "field" in item and "appliesToField" not in item:
                    item["appliesToField"] = item.pop("field")


    # Process relationships
    for rel_data in llm_data.get("relationships", []):
        if "id" in rel_data: 
            del rel_data["id"] # Remove it, Pydantic will create it

        # Update 'from' and 'to' fields using the mapped UUIDs
        llm_from_id = rel_data.get("from")
        if llm_from_id and llm_from_id in member_id_map:
            rel_data["from"] = member_id_map[llm_from_id]
        elif llm_from_id:
            print(f"Error/Warning: Relationship 'from' ID '{llm_from_id}' not found in member ID map. Pydantic validation might fail for this relationship if it's not a valid UUID already.")
            # If LLM returned a fully formed (but wrong) UUID here and it's not in map, it will fail.
            # If it's a placeholder that wasn't mapped, it will fail.
            # This path implies LLM error in providing consistent member IDs or relationship links.

        llm_to_id = rel_data.get("to")
        if llm_to_id and llm_to_id in member_id_map:
            rel_data["to"] = member_id_map[llm_to_id]
        elif llm_to_id:
            print(f"Error/Warning: Relationship 'to' ID '{llm_to_id}' not found in member ID map. Pydantic validation might fail for this relationship.")

    # --- Validate with Pydantic using the processed data ---
    parsed_tree = FamilyTreeData.model_validate(llm_data) # Use model_validate with the Python dict

    final_json_output_string = parsed_tree.model_dump_json(indent=2, by_alias=True) # Use by_alias=True for 'from'/'to'

    # --- 6. Save the final response to a JSON file ---
    with open(output_filename, 'w', encoding='utf-8') as f:
        f.write(final_json_output_string)
    print(f"\n--- Successfully validated and saved the response to {output_filename} ---")
    # print("\n--- Final Validated JSON ---")
    # print(final_json_output_string)


except ValidationError as e:
    print("\n--- Pydantic Validation Error ---")
    print(e.json(indent=2))
    print("\n--- Processed LLM Data (before Pydantic validation) that caused error was ---")
    if 'llm_data' in locals(): # Check if llm_data was defined
        print(json.dumps(llm_data, indent=2))
    else:
        print("llm_data was not available (error likely occurred before or during its creation).")
    print("\n--- Raw problematic JSON from model was ---")
    print(raw_json_text) # Print raw_json_text which is always defined

except json.JSONDecodeError as e:
    print(f"\n--- JSON Decode Error: Failed to parse LLM output as JSON. ---")
    print(f"Error: {e}")
    print("Content that failed to parse:")
    print(raw_json_text)

except Exception as e:
    print(f"\n--- An unexpected error occurred ---")
    print(f"Error type: {type(e).__name__}")
    print(f"Error: {e}")
    if raw_json_text: # Check if raw_json_text has content
        print("\n--- Raw response text (if available) was ---")
        print(raw_json_text)
    if 'response' in locals() and hasattr(response, 'prompt_feedback'):
        print(f"Prompt Feedback: {response.prompt_feedback}")
    if 'response' in locals() and hasattr(response, 'candidates') and response.candidates:
        for candidate in response.candidates:
            if candidate.finish_reason != 1: # 1 is "STOP"
                 print(f"Candidate Finish Reason: {candidate.finish_reason}")
                 print(f"Candidate Safety Ratings: {candidate.safety_ratings}")


--- Sending prompt to Gemini ---

--- Raw JSON from Model ---
{
  "members": [
    {
      "id": "temp_member_rajesh",
      "firstName": "Rajesh",
      "lastName": "Sharma",
      "maidenName": null,
      "birthDate": "1970-03-15",
      "deathDate": null,
      "causeOfDeath": null,
      "gender": "male",
      "photoUrl": null,
      "birthPlace": "Delhi, India",
      "deathPlace": null,
      "occupation": "Retired Civil Engineer",
      "bio": "Loves gardening and reading history.",
      "isPrivate": false,
      "conditions": [
        {
          "name": "Type 2 Diabetes",
          "diagnosisDate": "2018-06-20",
          "notes": "Managed with diet and Metformin"
        }
      ],
      "allergies": [],
      "medications": [
        {
          "name": "Metformin",
          "dosage": "500mg daily",
          "isCurrent": true
        }
      ],
      "lifestyle": {
        "smoking": "former",
        "dietNotes": "Maintains a balanced, home-cooked diet",
        "exer