In [1]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
from typing import Annotated
from typing import Literal
from typing_extensions import TypedDict

from langgraph.prebuilt import create_react_agent
from langgraph.graph import StateGraph, MessagesState, START, END
from langchain_openai import ChatOpenAI
from langgraph.types import Command
from IPython.display import Image,display

model = ChatOpenAI(
    model="gpt-3.5-turbo",  
    temperature=0,
    max_tokens=4000,
)

In [3]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

PDF_FOLDER = "./docs"
CHUNK_SIZE, CHUNK_OVERLAP, TOP_K = 1000, 200, 1
VECTORSTORE_CACHE = "./vectorstore_cache"

# --- Load or build vectorstore ---
if os.path.exists(VECTORSTORE_CACHE):
    # Load cached vectorstore
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma(persist_directory=VECTORSTORE_CACHE, embedding_function=embeddings)
    print("[INFO] Loaded cached vectorstore.")
else:
    # Read all PDFs in folder
    docs = []
    for filename in os.listdir(PDF_FOLDER):
        if filename.lower().endswith(".pdf"):
            pdf_path = os.path.join(PDF_FOLDER, filename)
            text = "\n".join(page.extract_text() or "" for page in PdfReader(pdf_path).pages)
            if text.strip():
                docs.append(text)

    full_text = "\n".join(docs)
    print(f"[INFO] Total PDFs read: {len(docs)}")

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP
    )
    chunks = text_splitter.split_text(full_text)
    print(f"[INFO] Total chunks created: {len(chunks)}")

    # Build vectorstore and persist
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_texts(chunks, embeddings, persist_directory=VECTORSTORE_CACHE)
    vectorstore.persist()
    print("[INFO] Vectorstore created and cached.")

# --- Create retriever ---
retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
print(f"[INFO] Retriever ready. Top-{TOP_K} chunks per query.")


  vectorstore = Chroma(persist_directory=VECTORSTORE_CACHE, embedding_function=embeddings)


[INFO] Loaded cached vectorstore.
[INFO] Retriever ready. Top-1 chunks per query.


In [4]:
from typing import TypedDict, List, Dict, Any, Literal
from langchain_openai import ChatOpenAI
from langgraph.types import Command
from langgraph.graph import StateGraph, MessagesState, START, END
from langchain.prompts import PromptTemplate
import json

class PersonFeatures(TypedDict):
    age: int
    marital: str
    dependents: int
    children_ages: List[int]
    income: float
    cpf_balance: float
    housing_type: str
    has_dependents: bool

class LifeStage(TypedDict):
    stage_name: str
    age_range: str
    features: Dict[str, Any]

class ShieldPlanRecommendation(TypedDict):
    plan_name: str
    hospital_class: str
    annual_coverage_limit: str
    panel_doctors: str
    waiting_time: str
    access_to_treatment: str
    out_of_pocket_cost: Dict[str, Any]
    suitable_for: List[str]
    reason: str
    projected_life_stage: List[str]

class ClientState(TypedDict):
    sections: List[Dict[str, Any]]
    life_stages: List[LifeStage]
    shield_plan_recommendations: List[ShieldPlanRecommendation]
    summary: str


In [5]:
def supervisor(state: dict) -> Command[Literal["agent_1", "agent_2", END]]:
    if not state.get("life_stages", []):  # check empty list too
        return Command(goto="agent_1")
    if not state.get("shield_plan_recommendations", []):  # same here
        return Command(goto="agent_2")
    return Command(goto=END)

import json

def agent_1(state: dict) -> Command[Literal["supervisor"]]:
    sections = state.get("sections", [])  # <-- corrected key
    prompt = f"""
    Based on the following client features:
    {json.dumps(sections)}

    Generate up to 5 life stages for this client, spanning from their current age to retirement. Each life stage should reflect realistic changes in marital status, dependents, income, career progression, and other relevant personal or financial circumstances. Output only structured JSON objects with stage_name, age_range, and features.

    Output ONLY a JSON array of  LifeStage objects with growth in mind.
    Each LifeStage object must include:
    - stage_name
    - age_range
    - features (dict)
    """
    life_stages = []
    try:
        response = model.invoke(prompt).content.strip()
        if response:
            life_stages = json.loads(response)
            if not isinstance(life_stages, list):
                life_stages = []
            else:
                life_stages = [s for s in life_stages if isinstance(s, dict)]
    except Exception as e:
        print("Agent_1 JSON error:", e, "Raw response:", response)

    return Command(goto="supervisor", update={"life_stages": life_stages})


def agent_2(state: dict) -> Command[Literal["supervisor"]]:
    life_stages = state.get("life_stages", [])
    if not life_stages:
        return Command(goto="supervisor", update={"shield_plan_recommendations": {"ShieldPlanRecommendation": []}})

    query_text = json.dumps(life_stages)
    docs = retriever.invoke(query_text)  # returns list of Document objects
    knowledge_text = "\n".join(d.page_content for d in docs[:3])
    prompt = f"""
    Using the knowledge base below and or online, recommend any PRUShield plans and PRUExtra Copay supplementary plans tailored to the client’s life stages.

    Knowledge Base:
    {knowledge_text}

    Client life stages:
    {json.dumps(life_stages)}

    Instructions:
    Generate a JSON array of ShieldPlanRecommendation objects for each life stage objects.
    Each object must include the following fields:
    - plan_name
    - hospital_class
    - annual_coverage_limit
    - panel_doctors
    - waiting_time
    - access_to_treatment
    - out_of_pocket_cost
    - suitable_for
    - reason
    - projected_life_stage
    Consider each life stage carefully and provide recommendations tailored to the client’s age, marital status, dependents, income, and other relevant features.
    Output strictly valid JSON. Do not include any explanation, comments, or text outside the JSON array.
    """

    recommendations = []
    try:
        response = model.invoke(prompt).content
        recommendations = json.loads(response)

        # wrap in expected dict key
        recommendations = {"ShieldPlanRecommendation": recommendations}

    except Exception as e:
        print("Agent_2 JSON error:", e)
        recommendations = {"ShieldPlanRecommendation": []}

    return Command(goto="supervisor", update={"shield_plan_recommendations": recommendations})


In [6]:
builder = StateGraph(ClientState)
builder.add_node(supervisor)
builder.add_node(agent_1)
builder.add_node(agent_2)
builder.add_edge(START, "supervisor")
supervisor_system = builder.compile()


In [7]:
from typing import TypedDict, List, Dict, Any
from datetime import datetime, date
import requests

def extract_features(data: dict) -> PersonFeatures:
    today = date.today()
    
    # Age
    dob_str = data.get("dob", {}).get("value")
    age = 0
    if dob_str:
        try:
            dob = datetime.strptime(dob_str, "%Y-%m-%d")
            age = today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day))
        except ValueError:
            pass

    # Children ages
    children = data.get("childrenbirthrecords", [])
    children_ages = []
    for child in children:
        child_dob_str = child.get("dob", {}).get("value") or child.get("dob")
        if child_dob_str:
            try:
                child_dob = datetime.strptime(child_dob_str, "%Y-%m-%d")
                child_age = today.year - child_dob.year - ((today.month, today.day) < (child_dob.month, child_dob.day))
                children_ages.append(child_age)
            except ValueError:
                continue

    # Income
    noas = data.get("noahistory", {}).get("noas", [])
    income = float(noas[-1]["amount"]["value"]) if noas else 0.0

    # CPF
    cpf = data.get("cpfbalances", {})
    cpf_balance = sum(float(cpf.get(k, {}).get("value", 0)) for k in ("oa", "sa", "ma", "ra"))

    # Marital and housing
    marital = data.get("marital", {}).get("desc", "Unknown")
    housing_type = data.get("hdbtype", {}).get("desc", "")

    return {
        "age": age,
        "marital": marital,
        "dependents": len(children),
        "children_ages": children_ages,
        "income": income,
        "cpf_balance": cpf_balance,
        "housing_type": housing_type,
        "has_dependents": bool(children_ages)
    }

# --- Fetch sample data and build client state ---
url = "https://sandbox.api.myinfo.gov.sg/com/v4/person-sample/S9812381D"
data = requests.get(url).json()
features = extract_features(data)

# --- Initialize ClientState ---
client_state: ClientState = {
    "sections": [features],  # wrap features in a list
    "life_stages": [],       # populate from Agent_1
    "shield_plan_recommendations": [],  # populate from Agent_2
    "summary": ""
}
print(json.dumps(client_state, indent=4))

{
    "sections": [
        {
            "age": 27,
            "marital": "MARRIED",
            "dependents": 1,
            "children_ages": [
                7
            ],
            "income": 53700.0,
            "cpf_balance": 94655.25,
            "housing_type": "3-ROOM FLAT (HDB)",
            "has_dependents": true
        }
    ],
    "life_stages": [],
    "shield_plan_recommendations": [],
    "summary": ""
}


In [8]:
finalresult = supervisor_system.invoke(client_state)
final_json = (json.dumps(finalresult, indent=4))
print(final_json)

{
    "sections": [
        {
            "age": 27,
            "marital": "MARRIED",
            "dependents": 1,
            "children_ages": [
                7
            ],
            "income": 53700.0,
            "cpf_balance": 94655.25,
            "housing_type": "3-ROOM FLAT (HDB)",
            "has_dependents": true
        }
    ],
    "life_stages": [
        {
            "stage_name": "Early Career",
            "age_range": "27-35",
            "features": {
                "marital": "MARRIED",
                "dependents": 1,
                "children_ages": [
                    7
                ],
                "income": 60000.0,
                "cpf_balance": 100000.0,
                "housing_type": "3-ROOM FLAT (HDB)",
                "has_dependents": true
            }
        },
        {
            "stage_name": "Mid Career",
            "age_range": "36-45",
            "features": {
                "marital": "MARRIED",
                "dependents": 

In [9]:
import json

with open("result.json", "w") as f:
    json.dump(finalresult, f)


In [None]:
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://192.168.0.92:8501[0m
[0m
