<hr style="border:30px solid Firebrick "> </hr>
<hr style="border:2px solid Firebrick "> </hr>

# Agentic Workflow Automation for Northwestern Memorial Hospital
**Author:** Atef Bader, PhD

**Last Edit:** 12/17/2024



## Goals

- Automate Call/Inquiry processing using Langgraph/Langchain with OpenAI
- Use OpenAI to route and answer user's questions directed to different departments represented by different agents for Northwestern Memorial Hospital

<hr style="border:2px solid Firebrick "> </hr>


<img src="attachment:6925f10a-1fae-4385-a348-d427e8a93cf0.png" align="center" width="500"/>


<hr style="border:5px solid orange "> </hr>


In [None]:
'''%%capture --no-stderr
%pip install uv
%uv pip install chromadb==0.4.22
%uv pip install tiktoken==0.9.0
%uv pip install langchain==0.3.20
%uv pip install langchain-community==0.3.10
%uv pip install langchain-openai==0.3.1
%uv pip install langchainhub
%uv pip install langchain-text-splitters==0.3.6
%uv pip install langgraph==0.3.1
%uv pip install openai==1.65.3
%uv pip install PyMuPDF==1.25.3
%uv pip install pypdf==5.3.1
%uv pip install pillow==11.1.0
%uv pip install beautifulsoup4==4.13.3
%uv pip install  mermaid_cli
%uv pip install grandalf'''
    
  

In [None]:
from IPython.display import Image as IPImage
from IPython.display import Image, display

from typing import TypedDict, Optional, List, Dict, Any, Annotated, Tuple, Optional, Literal, Callable
from typing_extensions import TypedDict
import operator

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import MessagesState
from langgraph.graph import StateGraph, START, END

from langgraph.prebuilt import tools_condition, ToolNode
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, BaseMessage
from langchain_openai import ChatOpenAI

from langgraph.graph.message import add_messages

### NEW
from langchain_core.tools import StructuredTool
from langchain.agents import create_agent
from datetime import date, datetime, timedelta
import pandas as pd
from __future__ import annotations
from langchain_core.tools import tool
import os
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import csv
import re
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
import json
from langchain_core.documents import Document

load_dotenv(find_dotenv())

LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv("LANGCHAIN_PROJECT")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2") == "true"

INPUT_DIR = Path.cwd() / "Input"
OUTPUT_DIR = Path.cwd() / "Output"

KB_DIR = Path.cwd() / "kb"

CARDIOLOGY_SCHEDULE_CSV = KB_DIR / "cardiology_appointment_slots.csv"
PRIMARY_CARE_RESULTS_CSV = KB_DIR / "primary_care_test_results.csv" 

CARDIOLOGY_KB_PATH = "./kb/cardiology_kb.jsonl"
BILLING_KB_PATH = "./kb/billing_kb.jsonl"
ER_AGENT_KB_PATH = "./kb/er_agent_kb.jsonl"
PEDIATRICS_KB_PATH = "./kb/pediatrics_kb.jsonl"
PRIMARY_CARE_KB_PATH = "./kb/primary_care_kb.jsonl"
RADIOLOGY_KB_PATH = "./kb/radiology_kb.jsonl"


REBUILD_CHROMA = False   # <-- set to False to reuse persisted DB
CHROMA_DIR = "./chroma_kb"
#CHROMA_DIR = "./chroma_kb_rebuild" if REBUILD_CHROMA else "./chroma_kb"


print("LANGCHAIN_PROJECT:", LANGCHAIN_PROJECT)
print("LANGCHAIN_TRACING_V2:", LANGCHAIN_TRACING_V2)

In [None]:
'''import os, getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("OPENAI_API_KEY")'''

# Declare state dictionary structure

In [None]:
# Requirement 1: Define the structure of agent state for the LangGraph
class InquiryState(TypedDict):
    messages: Annotated[List[BaseMessage], add_messages]
    inquiry: str
    next_node: str
    response: Optional[str]

    # routing
    intent: Optional[str]
    active_agent: Optional[str]  # which agent currently owns the thread
    last_router_reason: Optional[str]

    # retrieval payload
    retrieved: Optional[List[Dict[str, Any]]]  # JSON-serializable so easy to log/debug. [{id, text, score, meta}, ...]
    retrieval_confidence: Optional[float]

    # patient identifying info
    patient_id: str

# Functions and structures for handling messages in state

In [None]:
ROUTABLE_AGENTS = {
    "ER",
    "Radiology",
    "PrimaryCare",
    "Cardiology",
    "Pediatrics",
    "BillingInsurance",
}

INTENT_MAP = {
    "greeting": "Greeting",
    "generalinquiry": "GeneralInquiry",
    "er": "ER",
    "radiology": "Radiology",
    "primarycare": "PrimaryCare",
    "cardiology": "Cardiology",
    "pediatrics": "Pediatrics",
    "billinginsurance": "BillingInsurance",
}

TransitionLabel = Literal["NEW_TOPIC", "CONTINUATION"]


def _normalize_intent(intent: Optional[str]) -> Optional[str]:
    if not intent:
        return None
    key = re.sub(r"[^a-z]", "", intent.lower())
    return INTENT_MAP.get(key, intent.strip())


def _latest_user_inquiry(state: InquiryState) -> str:
    msgs = state.get("messages") or []
    for msg in reversed(msgs):
        if isinstance(msg, HumanMessage):
            if isinstance(msg.content, str):
                return msg.content
            if isinstance(msg.content, list):
                text_parts = [part.get("text", "") for part in msg.content if isinstance(part, dict) and part.get("type") == "text"]
                return " ".join(text_parts).strip()
    return state.get("inquiry", "")


def _latest_prior_exchange(state: InquiryState) -> tuple[str, str]:
    msgs = state.get("messages") or []
    latest_user = _latest_user_inquiry(state)

    prior_ai = ""
    seen_latest_user = False
    for msg in reversed(msgs):
        if isinstance(msg, HumanMessage) and not seen_latest_user:
            seen_latest_user = True
            continue
        if seen_latest_user and isinstance(msg, AIMessage):
            if isinstance(msg.content, str):
                prior_ai = msg.content
            else:
                prior_ai = str(msg.content)
            break

    return latest_user, prior_ai


def _classify_transition(state: InquiryState, llm: ChatOpenAI) -> TransitionLabel:
    latest_user, prior_ai = _latest_prior_exchange(state)
    active_agent = state.get("active_agent") or "None"

    system = SystemMessage(
        content=(
            "You classify whether the latest user message is a continuation of the prior exchange "
            "or a clearly new topic or department request. "
            "Return exactly one token: NEW_TOPIC or CONTINUATION."
        )
    )

    human = HumanMessage(content=[{
        "type": "text",
        "text": (
            f"Active agent: {active_agent}\n"
            f"Prior assistant message: {prior_ai or '(none)'}\n"
            f"Latest user message: {latest_user}\n\n"
            "Return NEW_TOPIC or CONTINUATION only."
        ),
    }])

    raw = llm.invoke([system, human]).content.strip().upper()
    if "CONTINUATION" in raw:
        return "CONTINUATION"
    if "NEW_TOPIC" in raw:
        return "NEW_TOPIC"

    # Safe default: allow reclassification/handoff over sticky routing.
    return "NEW_TOPIC"

def _extract_agent_text(result: dict) -> str:
    """Handle both AgentExecutor-style {'output': ...} and LangGraph-style {'messages': [...]}."""
    if not isinstance(result, dict):
        return str(result)

    # 1) AgentExecutor style
    out = result.get("output")
    if isinstance(out, str) and out.strip():
        return out.strip()

    # 2) Messages style
    msgs = result.get("messages") or result.get("message") or []
    if isinstance(msgs, list) and msgs:
        # Walk backwards to find the last non-empty assistant message
        for m in reversed(msgs):
            if isinstance(m, AIMessage) and isinstance(m.content, str) and m.content.strip():
                return m.content.strip()
            # Some frameworks store messages as dicts
            if isinstance(m, dict):
                c = m.get("content")
                role = m.get("role") or m.get("type")
                if role in ("assistant", "ai") and isinstance(c, str) and c.strip():
                    return c.strip()

    # 3) Fallback: stringify
    return ""


# Creating or loading knowledge base stores

In [None]:
def load_kb_jsonl(path: str, agent_name: str) -> list[Document]:
    docs: list[Document] = []
    for line in Path(path).read_text(encoding="utf-8").splitlines():
        if not line.strip():
            continue
        row = json.loads(line)
        tags = row.get("tags", [])
        doc_text = f"Q: {row['question']}\nA: {row['answer']}\nTags: {', '.join(tags)}"
        docs.append(
            Document(
                page_content=doc_text,
                metadata={
                    "id": row["id"],
                    "agent": agent_name,
                    "tags": row.get("tags", []),
                    "question": row["question"],
                },
            )
        )
    return docs

# This creates the collection and saves it to disk (persist_directory). Next runs can just load it.
# if changing knowledge base content, delete the ./chroma_kb folder and rebuild to avoid accidentally keeping stale embeddings.

def build_or_load_chroma_collection(
    collection_name: str,
    persist_directory: str,
    documents: list[Document] | None = None,
    rebuild: bool = False,
):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

    store = Chroma(
        collection_name=collection_name,
        persist_directory=persist_directory,
        embedding_function=embeddings,
    )

    # Only add documents if:
    # - documents were provided
    # - and we are rebuilding OR collection is empty
    if documents:
        current_count = store._collection.count()

        if current_count == 0:
            ids = [d.metadata["id"] for d in documents]
            store.add_documents(documents, ids=ids)

    return store

In [None]:
# Cardiology
cardio_docs = load_kb_jsonl(CARDIOLOGY_KB_PATH, agent_name="Cardiology")
cardiology_store = build_or_load_chroma_collection(
    collection_name="kb_cardiology",
    persist_directory=CHROMA_DIR,
    documents=cardio_docs,
    rebuild=REBUILD_CHROMA,
)

# Billing
billing_docs = load_kb_jsonl(BILLING_KB_PATH, agent_name="BillingInsurance")
billing_store = build_or_load_chroma_collection(
    collection_name="kb_billing",
    persist_directory=CHROMA_DIR,
    documents=billing_docs,
    rebuild=REBUILD_CHROMA,
)

# ER
er_docs = load_kb_jsonl(ER_AGENT_KB_PATH, agent_name="ER")
er_store = build_or_load_chroma_collection(
    collection_name="kb_er",
    persist_directory=CHROMA_DIR,
    documents=er_docs,
    rebuild=REBUILD_CHROMA,
)

# Pediatrics
pediatrics_docs = load_kb_jsonl(PEDIATRICS_KB_PATH, agent_name="Pediatrics")
pediatrics_store = build_or_load_chroma_collection(
    collection_name="kb_pediatrics",
    persist_directory=CHROMA_DIR,
    documents=pediatrics_docs,
    rebuild=REBUILD_CHROMA,
)

# Primary Care
primary_care_docs = load_kb_jsonl(PRIMARY_CARE_KB_PATH, agent_name="PrimaryCare")
primary_care_store = build_or_load_chroma_collection(
    collection_name="kb_primary_care",
    persist_directory=CHROMA_DIR,
    documents=primary_care_docs,
    rebuild=REBUILD_CHROMA,
)

# Radiology
radiology_docs = load_kb_jsonl(RADIOLOGY_KB_PATH, agent_name="Radiology")
radiology_store = build_or_load_chroma_collection(
    collection_name="kb_radiology",
    persist_directory=CHROMA_DIR,
    documents=radiology_docs,
    rebuild=REBUILD_CHROMA,
)

# Retrieval functions

In [None]:
# Keyword overlap scorer

def _tokenize(s: str) -> set[str]:
    s = s.lower()
    s = re.sub(r"[^a-z0-9\s]", " ", s)
    return set(t for t in s.split() if len(t) > 2)

def keyword_overlap_score(query: str, text: str) -> float:
    q = _tokenize(query)
    if not q:
        return 0.0
    d = _tokenize(text)
    return len(q & d) / len(q)

### Hybrid retrieval function
grabs top k_vec from Chroma  
rescoring with a weighted mix of:  
- vector rank-based score (simple, stable across distance metrics)
- keyword overlap score  

outputs top k_final + a confidence value

In [None]:
def hybrid_retrieve(
    query: str,
    store: Chroma,
    k_vec: int = 8,
    k_final: int = 3,
    alpha: float = 0.75,   # weight on vector ranking
) -> Tuple[List[Dict[str, Any]], float]:
    """
    Returns: (retrieved_items, confidence)
    retrieved_items: [{id, text, score, meta}, ...]
    confidence: float in [0, 1] (rough heuristic)
    """
    docs = store.similarity_search(query, k=k_vec)

    # Convert to serializable items
    items = []
    for i, d in enumerate(docs):
        text = d.page_content
        meta = d.metadata or {}
        items.append({
            "id": meta.get("id", f"doc_{i}"),
            "text": text,
            "meta": meta,
            "vec_rank": i,  # 0 best
        })

    if not items:
        return [], 0.0

    # Vector rank score: best doc ~1.0, worst ~0.0
    denom = max(1, (len(items) - 1))
    for it in items:
        vec_score = 1.0 - (it["vec_rank"] / denom)
        kw_score = keyword_overlap_score(query, it["text"])
        it["score"] = alpha * vec_score + (1 - alpha) * kw_score

    items.sort(key=lambda x: x["score"], reverse=True)
    top = items[:k_final]

    # Confidence heuristic: best score + gap to 2nd
    best = top[0]["score"]
    second = top[1]["score"] if len(top) > 1 else 0.0
    confidence = max(0.0, min(1.0, best * 0.85 + (best - second) * 0.15))

    return top, confidence

In [None]:
'''@tool
def retrieve_cardiology(query: str, k_vec: int = 8, k_final: int = 3) -> Dict[str, Any]:
    """
    Retrieve cardiology knowledge-base snippets relevant to the user query.

    Returns snippets formatted as:
      [snippet_id]
      snippet text

    The assistant should cite snippets as [snippet_id].
    """
    retrieved, conf = hybrid_retrieve(query, cardiology_store, k_vec=k_vec, k_final=k_final)

    snippets: List[Dict[str, str]] = []
    for r in (retrieved or [])[:k_final]:
        snippets.append(
            {"id": str(r.get("id", "")), "text": str(r.get("text", ""))}
        )

    retrieved_text = "\n\n".join([f"[{s['id']}]\n{s['text']}" for s in snippets])

    return {
        "query": query,
        "retrieval_confidence": conf,
        "num_snippets": len(snippets),
        "snippets": snippets,
        "retrieved_text": retrieved_text,
    }'''

In [None]:
# -----------------------------
# Reusable retriever tool factory
# -----------------------------

def make_retriever_tool(store, tool_name: str, department_label: str):
    """
    Creates a LangChain tool that retrieves KB snippets from the given Chroma store
    using hybrid_retrieve(...) function.

    Returns a standardized response dict with:
      - query
      - retrieval_confidence
      - num_snippets
      - snippets: [{id, text}, ...]
      - retrieved_text: formatted as:
            [snippet_id]
            snippet text

    The assistant should cite snippets as [snippet_id].
    """

    def _retrieve(query: str, k_vec: int = 8, k_final: int = 3) -> Dict[str, Any]:
        retrieved, conf = hybrid_retrieve(query, store, k_vec=k_vec, k_final=k_final)

        snippets: List[Dict[str, str]] = []
        for r in (retrieved or [])[:k_final]:
            snippets.append(
                {"id": str(r.get("id", "")), "text": str(r.get("text", ""))}
            )

        retrieved_text = "\n\n".join([f"[{s['id']}]\n{s['text']}" for s in snippets])

        return {
            "department": department_label,
            "query": query,
            "retrieval_confidence": conf,
            "num_snippets": len(snippets),
            "snippets": snippets,
            "retrieved_text": retrieved_text,
        }

    return StructuredTool.from_function(
        func=_retrieve,
        name=tool_name,
        description=(
            f"Retrieve {department_label} knowledge-base snippets relevant to the user query. "
            "Returns snippets formatted as [snippet_id] followed by snippet text. "
            "The assistant should cite snippets as [snippet_id]."
        ),
    )


# -----------------------------
# Department-specific tools
# -----------------------------
retrieve_cardiology = make_retriever_tool(
    store=cardiology_store,
    tool_name="retrieve_cardiology",
    department_label="Cardiology",
)

retrieve_billing = make_retriever_tool(
    store=billing_store,
    tool_name="retrieve_billing",
    department_label="Billing",
)

retrieve_er = make_retriever_tool(
    store=er_store,
    tool_name="retrieve_er",
    department_label="Emergency Room",
)

retrieve_pediatrics = make_retriever_tool(
    store=pediatrics_store,
    tool_name="retrieve_pediatrics",
    department_label="Pediatrics",
)

retrieve_primary_care = make_retriever_tool(
    store=primary_care_store,
    tool_name="retrieve_primary_care",
    department_label="Primary Care",
)

retrieve_radiology = make_retriever_tool(
    store=radiology_store,
    tool_name="retrieve_radiology",
    department_label="Radiology",
)

## Cardiology get schedule implementation

In [None]:
def _parse_iso_date(s: str) -> Optional[date]:
    try:
        return datetime.strptime(s, "%Y-%m-%d").date()
    except Exception:
        return None

def _next_week_range(today: date) -> Tuple[date, date]:
    """
    'Next week' = next Monday (relative to today's week) through the following Monday (exclusive).
    """
    this_monday = today - timedelta(days=today.weekday())  # Mon of current week
    next_monday = this_monday + timedelta(days=7)
    following_monday = next_monday + timedelta(days=7)
    return next_monday, following_monday

def _extract_week_hint(question: str) -> str:
    """
    Minimal intent parser.
    Returns: "next_week" (default) or "this_week" if question says 'this week'.
    """
    q = (question or "").lower()
    if "this week" in q:
        return "this_week"
    if "next week" in q:
        return "next_week"
    # default for appointment scheduling questions like "any availability next week?"
    return "next_week"

def _this_week_range(today: date) -> Tuple[date, date]:
    this_monday = today - timedelta(days=today.weekday())
    next_monday = this_monday + timedelta(days=7)
    return this_monday, next_monday

def get_cardiology_schedule(
    question: str,
    csv_path: str,
    today: Optional[str] = None,
    department: str = "Cardiology",
    only_available: bool = True,
    max_results: int = 12,
) -> Dict[str, Any]:
    """
    Look up appointment slot availability in a CSV schedule and return matching slots.

    Use this tool when a user asks about appointment availability (e.g.:
    "Do you have any available appointments with a cardiologist next week?").

    Args:
      question: The user question.
      csv_path: Path to the schedule CSV.
      today: Optional override for today's date in YYYY-MM-DD (helps testing).
      department: Department to filter on (default Cardiology).
      only_available: If True, returns AVAILABLE slots only.
      max_results: Max slots to return (keeps tool output small).

    Returns:
      A dictionary containing the interpreted date range, counts, and a list of slots.
    """
    if not csv_path:
        raise ValueError("csv_path is required.")

    if not os.path.exists(csv_path):
        raise FileNotFoundError(f"Schedule CSV not found at: {csv_path}")

    # Determine "today"
    if today:
        today_d = _parse_iso_date(today)
        if not today_d:
            raise ValueError("today must be in YYYY-MM-DD format")
    else:
        today_d = date.today()

    # Determine time window
    window = _extract_week_hint(question)
    if window == "this_week":
        start_d, end_d = _this_week_range(today_d)
    else:
        start_d, end_d = _next_week_range(today_d)

    df = pd.read_csv(csv_path)

    # Validate columns
    required = {"date", "department", "slot_start_time_local", "slot_status"}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"CSV missing required columns: {sorted(missing)}")

    # Normalize and filter
    df["date_parsed"] = pd.to_datetime(df["date"], errors="coerce").dt.date
    df = df[df["date_parsed"].notna()]
    df = df[(df["date_parsed"] >= start_d) & (df["date_parsed"] < end_d)]

    if department:
        df = df[df["department"].astype(str).str.lower() == department.lower()]

    if only_available:
        df = df[df["slot_status"].astype(str).str.upper() == "AVAILABLE"]

    # Sort by date/time
    df["slot_time_norm"] = df["slot_start_time_local"].astype(str)
    df = df.sort_values(["date_parsed", "slot_time_norm"], ascending=True)

    # Convert to compact records
    slots: List[Dict[str, Any]] = []
    for _, r in df.head(max_results).iterrows():
        slots.append(
            {
                "date": str(r.get("date", "")),
                "day_of_week": str(r.get("day_of_week", "")),
                "start_time": str(r.get("slot_start_time_local", "")),
                "duration_minutes": int(r.get("slot_duration_minutes", 0) or 0),
                "appointment_type": str(r.get("appointment_type", "")),
                "provider_name": str(r.get("provider_name", "")),
                "location": str(r.get("location", "")),
                "status": str(r.get("slot_status", "")),
            }
        )

    return {
        "tool": "get_schedule",
        "question": question,
        "interpreted": {
            "today": today_d.isoformat(),
            "time_window": window,
            "range_start": start_d.isoformat(),
            "range_end_exclusive": end_d.isoformat(),
            "department": department,
            "only_available": only_available,
        },
        "results": {
            "total_matching_slots": int(df.shape[0]),
            "returned_slots": len(slots),
            "slots": slots,
        },
    }

@tool
def get_cardiology_schedule_tool(question: str, today: str | None = None) -> dict:
    """
    Get AVAILABLE cardiology appointment slots for next week from the hospital schedule CSV.

    Args:
      question: The user’s question about scheduling/availability.
      today: Optional override in YYYY-MM-DD (useful for testing).
    Returns:
      Dict with interpreted date range + matching slots.
    """
    # Assumes you already defined `get_schedule(question, csv_path, today, ...)`
    return get_cardiology_schedule(
        question=question,
        csv_path=CARDIOLOGY_SCHEDULE_CSV,
        today=today,
        department="Cardiology",
        only_available=True,
        max_results=12,
    )

## Test review status tool implementation for primary care agent

In [None]:
import os
from datetime import datetime, timedelta, date
from typing import Any, Dict, List, Optional, Tuple

import pandas as pd
from langchain.tools import tool


def _parse_dt(s: str) -> Optional[datetime]:
    if not s or not isinstance(s, str):
        return None
    for fmt in ("%Y-%m-%d %H:%M", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"):
        try:
            # If just a date, treat as midnight local
            dt = datetime.strptime(s, fmt)
            return dt
        except Exception:
            continue
    return None


def _extract_recency_days(question: str, default_days: int = 14) -> int:
    """
    Minimal intent parser for 'recent' / 'last X days' / 'last week' / 'latest'.
    Returns a day window to search back from "now".
    """
    q = (question or "").lower()

    if "today" in q:
        return 1
    if "yesterday" in q:
        return 2
    if "last week" in q:
        return 7
    if "past week" in q:
        return 7
    if "recent" in q:
        return default_days
    if "latest" in q or "most recent" in q:
        return default_days

    # naive parse for "last 30 days"
    import re
    m = re.search(r"last\s+(\d+)\s+day", q)
    if m:
        try:
            return max(1, int(m.group(1)))
        except Exception:
            pass

    return default_days


def get_primary_care_test_review_status(
    question: str,
    csv_path: str,
    patient_id: str,
    now: Optional[str] = None,
    department: str = "Primary Care",
    lookback_days_default: int = 14,
    max_results: int = 5,
) -> Dict[str, Any]:
    """
    Look up whether a patient's recent test results have been reviewed by their PCP.

    Use this tool when a user asks things like:
      - "Has my recent test result been reviewed by my primary care doctor?"
      - "Did my PCP look at my labs yet?"
      - "Any update on my bloodwork?"

    Args:
      question: user question (used for recency hints).
      csv_path: path to the test results status CSV.
      patient_id: identifier (MRN / patient_id).
      now: optional override (YYYY-MM-DD or YYYY-MM-DD HH:MM).
      department: filter department (default Primary Care).
      lookback_days_default: default window for 'recent/latest'.
      max_results: cap returned rows.

    Returns:
      Dict with interpreted window + latest results + review info.
    """
    if not csv_path:
        raise ValueError("csv_path is required.")
    if not os.path.exists(csv_path):
        raise FileNotFoundError(f"Test results CSV not found at: {csv_path}")
    if not patient_id:
        raise ValueError("patient_id is required.")

    # Determine "now"
    now_dt = _parse_dt(now) if now else datetime.now()
    if not now_dt:
        raise ValueError("now must be in YYYY-MM-DD or YYYY-MM-DD HH:MM format")

    lookback_days = _extract_recency_days(question, default_days=lookback_days_default)
    start_dt = now_dt - timedelta(days=lookback_days)

    df = pd.read_csv(csv_path)

    # Validate required columns
    required = {
        "patient_id",
        "department",
        "test_name",
        "resulted_datetime_local",
        "review_status",
    }
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"CSV missing required columns: {sorted(missing)}")

    # Normalize types
    df["patient_id_norm"] = df["patient_id"].astype(str).str.strip()
    df["dept_norm"] = df["department"].astype(str).str.strip().str.lower()

    df["resulted_dt"] = pd.to_datetime(df["resulted_datetime_local"], errors="coerce")
    df = df[df["resulted_dt"].notna()]

    # Filter by patient + department + time window
    df = df[df["patient_id_norm"] == str(patient_id).strip()]
    if department:
        df = df[df["dept_norm"] == department.lower()]

    df = df[(df["resulted_dt"] >= start_dt) & (df["resulted_dt"] <= now_dt)]

    # Most recent first
    df = df.sort_values(["resulted_dt"], ascending=False)

    # Optional columns (if present)
    has_reviewed_dt = "reviewed_datetime_local" in df.columns
    has_reviewed_by = "reviewed_by" in df.columns
    has_released = "released_to_patient" in df.columns
    has_result_status = "result_status" in df.columns

    results: List[Dict[str, Any]] = []
    for _, r in df.head(max_results).iterrows():
        item = {
            "test_name": str(r.get("test_name", "")),
            "resulted_datetime_local": str(r.get("resulted_datetime_local", "")),
            "review_status": str(r.get("review_status", "")),
        }
        if has_result_status:
            item["result_status"] = str(r.get("result_status", ""))
        if has_reviewed_dt:
            item["reviewed_datetime_local"] = str(r.get("reviewed_datetime_local", "")) if pd.notna(r.get("reviewed_datetime_local", "")) else ""
        if has_reviewed_by:
            item["reviewed_by"] = str(r.get("reviewed_by", "")) if pd.notna(r.get("reviewed_by", "")) else ""
        if has_released:
            item["released_to_patient"] = str(r.get("released_to_patient", ""))
        results.append(item)

    # Summarize “latest” row if exists
    latest = results[0] if results else None

    return {
        "tool": "get_test_review_status",
        "question": question,
        "interpreted": {
            "patient_id": str(patient_id).strip(),
            "department": department,
            "now": now_dt.strftime("%Y-%m-%d %H:%M"),
            "lookback_days": lookback_days,
            "range_start": start_dt.strftime("%Y-%m-%d %H:%M"),
            "range_end": now_dt.strftime("%Y-%m-%d %H:%M"),
        },
        "results": {
            "total_matching_results": int(df.shape[0]),
            "returned_results": len(results),
            "latest_result": latest,
            "recent_results": results,
        },
    }

@tool
def get_primary_care_test_review_status_tool(question: str, patient_id: str | None = None) -> dict:
    """
    Look up review status for a patient's recent Primary Care test results.

    If patient_id is missing, returns error.code == MISSING_PATIENT_ID.
    """
    if not patient_id or not str(patient_id).strip():
        return {
            "tool": "get_test_review_status",
            "error": {
                "code": "MISSING_PATIENT_ID",
                "message": "patient_id is required to look up test result review status."
            }
        }

    return get_primary_care_test_review_status(
        question=question,
        csv_path=PRIMARY_CARE_RESULTS_CSV,
        patient_id=str(patient_id).strip(),
        department="Primary Care",
    )

### Agents and their nodes

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

BASE_DEPT_PROMPT = (
    "You are the {dept} assistant for a hospital call center.\n"
    "Answer the user's inquiry using ONLY the information that you get from your tools.\n"
    "If the answer is not explicitly supported by the context from those tools, say you don't have enough information.\n"
    "When you use information from {retrieve_tool_name}, cite it like [snippet_id].\n"
)

SCHED_PROMPT_ADDON = (
    "If the user asks about scheduling/availability, call {schedule_tool_name}.\n"
    "When answering scheduling questions, summarize the earliest few available slots and ask what day/time the caller prefers.\n"
)

TEST_RESULTS_PROMPT_ADDON = (
    "If the user asks whether a test/lab result has been reviewed (or asks for test result status), "
    "you MUST call {test_results_tool_name}.\n"
    "This requires a patient_id.\n"
    "If patient_id is not provided in the current conversation context/state, ask the user for their patient ID "
    "and DO NOT call the tool until it is provided.\n"
    "Do not guess patient_id.\n"
)

TOOL_ERROR_HANDLING_ADDON = (
    "If any tool returns an 'error' object, do NOT fabricate an answer.\n"
    "If error.code is 'MISSING_PATIENT_ID', ask the user for their patient ID.\n"
)

def make_department_agent(
    *,
    dept: str,
    retrieve_tool,
    retrieve_tool_name: str,
    schedule_tool=None,
    schedule_tool_name: str | None = None,
    extra_tools: list | None = None,
    prompt_addons: list[str] | None = None,

):
    tools = [retrieve_tool]
    if schedule_tool:
        tools.append(schedule_tool)
    if extra_tools:
        tools.extend(extra_tools)

    system_prompt = BASE_DEPT_PROMPT.format(
        dept=dept,
        retrieve_tool_name=retrieve_tool_name,
    )

    # Include global error behavior
    system_prompt += TOOL_ERROR_HANDLING_ADDON

    if schedule_tool and schedule_tool_name:
        system_prompt += SCHED_PROMPT_ADDON.format(schedule_tool_name=schedule_tool_name)

    if prompt_addons:
        system_prompt += "\n".join(prompt_addons) + "\n"

    return create_agent(
        model=llm,
        tools=tools,
        system_prompt=system_prompt,
    )

def make_department_node(*, agent, dept_label: str, max_history: int = 12, passthrough_keys: list[str] | None = None):
    """
    Returns a LangGraph node function that:
      - pulls recent Human/AI messages from state
      - invokes the department agent
      - prefixes response with '<Dept>:: '
      - returns message update in the standardized way
    """

    passthrough_keys = passthrough_keys or ["patient_id"]  # safe default

    def _node(state):
        msgs = [m for m in (state.get("messages") or []) if isinstance(m, (HumanMessage, AIMessage))]
        msgs = msgs[-max_history:]  # keep recent context only

        agent_input = {"messages": msgs}

        # pass-through optional fields if they exist
        for k in passthrough_keys:
            if k in state and state.get(k) not in (None, ""):
                agent_input[k] = state.get(k)        

        result = agent.invoke(agent_input)
        resp = _extract_agent_text(result) or "(Debug: agent returned no assistant text.)"

        final_response = f"{dept_label}:: {resp}"

        return {
            "active_agent": dept_label,
            "response": final_response,
            "next_node": END,
            "messages": [AIMessage(content=final_response)],
        }

    return _node

cardiology_agent = make_department_agent(
    dept="Cardiology",
    retrieve_tool=retrieve_cardiology,
    retrieve_tool_name="retrieve_cardiology",
    schedule_tool=get_cardiology_schedule_tool,
    schedule_tool_name="get_cardiology_schedule_tool",
)

cardiology_node = make_department_node(
    agent=cardiology_agent,
    dept_label="Cardiology",
)

radiology_agent = make_department_agent(
    dept="Radiology",
    retrieve_tool=retrieve_radiology,
    retrieve_tool_name="retrieve_radiology",
    # schedule tool optional; include if you have one
)

radiology_node = make_department_node(
    agent=radiology_agent,
    dept_label="Radiology",
)

primary_care_agent = make_department_agent(
    dept="Primary Care",
    retrieve_tool=retrieve_primary_care,
    retrieve_tool_name="retrieve_primary_care",
    extra_tools=[get_primary_care_test_review_status_tool],
    prompt_addons=[
        TEST_RESULTS_PROMPT_ADDON.format(test_results_tool_name="get_primary_care_test_review_status_tool")
    ],
)

primary_care_node = make_department_node(
    agent=primary_care_agent,
    dept_label="PrimaryCare",
    passthrough_keys=["patient_id"], 
)

pediatrics_agent = make_department_agent(
    dept="Pediatrics",
    retrieve_tool=retrieve_pediatrics,
    retrieve_tool_name="retrieve_pediatrics",
)

pediatrics_node = make_department_node(
    agent=pediatrics_agent,
    dept_label="Pediatrics",
)

er_agent = make_department_agent(
    dept="Emergency Room",
    retrieve_tool=retrieve_er,
    retrieve_tool_name="retrieve_er",
)

er_node = make_department_node(
    agent=er_agent,
    dept_label="ER",  # label can be shorter if you want
)

billing_agent = make_department_agent(
    dept="Billing",
    retrieve_tool=retrieve_billing,
    retrieve_tool_name="retrieve_billing",
)

billing_node = make_department_node(
    agent=billing_agent,
    dept_label="Billing",
)

In [None]:
def operator_router(state):

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    inquiry = _latest_user_inquiry(state)
    active_agent = _normalize_intent(state.get("active_agent"))

    query = f"""Classify the user's intents based on the following input: '{inquiry}'.
            List of possible intent values: Greeting, GeneralInquiry, ER, Radiology, PrimaryCare, Cardiology, Pediatrics, BillingInsurance
            Return only the intent value of the inquiry identified with no extra text or characters"""

    human_message = HumanMessage(
        content=[
            {"type": "text", "text": query},
        ],
    )

    system_message = SystemMessage(content="You are a helpful assistant tasked with classifying the intent of user's inquiry")

    response = llm.invoke([system_message] + [human_message])
    classified_intent = _normalize_intent(response.content.strip())

    transition: Optional[TransitionLabel] = None
    if active_agent in ROUTABLE_AGENTS:
        transition = _classify_transition(state, llm)

    # Department routing wins, even during continuation.
    if classified_intent in ROUTABLE_AGENTS:
        if active_agent in ROUTABLE_AGENTS and classified_intent != active_agent:
            reason = "continuation_but_department_handoff" if transition == "CONTINUATION" else "new_topic_department_handoff"
        elif active_agent in ROUTABLE_AGENTS and classified_intent == active_agent and transition == "CONTINUATION":
            reason = "continuation_to_active_agent"
        else:
            reason = "fresh_intent_classification"

        return {
            "inquiry": inquiry,
            "intent": classified_intent,
            "active_agent": classified_intent,
            "last_router_reason": reason,
            "next_node": classified_intent,
            "response": None,
        }

    # If user is clearly continuing with an active department, do not let
    # Greeting/GeneralInquiry steal the turn from the owning agent.
    if (
        active_agent in ROUTABLE_AGENTS
        and transition == "CONTINUATION"
        and classified_intent in {"Greeting", "GeneralInquiry"}
    ):
        return {
            "inquiry": inquiry,
            "intent": active_agent,
            "active_agent": active_agent,
            "last_router_reason": f"continuation_overrode_{classified_intent.lower()}",
            "next_node": active_agent,
            "response": None,
        }

    if classified_intent == "Greeting":
        greeting = "Hello there, This is Northwestern Memorial Hospital, How can I assist you today?"
        return {
            "inquiry": inquiry,
            "intent": "Greeting",
            "active_agent": None,
            "last_router_reason": "fresh_intent_classification",
            "next_node": END,
            "response": greeting,
            "messages": [AIMessage(content=greeting)],
        }

    if classified_intent == "GeneralInquiry":
        general_response = "For general information about nearby parking, hotels and restaurants, please visit https://www.nm.org/ and navigate to Patients & Visitors link "
        return {
            "inquiry": inquiry,
            "intent": "GeneralInquiry",
            "active_agent": None,
            "last_router_reason": "fresh_intent_classification",
            "next_node": END,
            "response": general_response,
            "messages": [AIMessage(content=general_response)],
        }

    # If intent is unmapped but transition says continuation, keep owner.
    if active_agent in ROUTABLE_AGENTS and transition == "CONTINUATION":
        return {
            "inquiry": inquiry,
            "intent": active_agent,
            "active_agent": active_agent,
            "last_router_reason": "continuation_with_unmapped_intent",
            "next_node": active_agent,
            "response": None,
        }

    fallback_response = "I could not determine the right department. Could you share a bit more detail about your request?"
    return {
        **state,
        "inquiry": inquiry,
        "intent": None,
        "active_agent": None,
        "last_router_reason": "unmapped_intent",
        "next_node": END,
        "response": fallback_response,
        "messages": [AIMessage(content=fallback_response)],
    }


In [None]:
builder = StateGraph(InquiryState)

builder.add_node("Operator", operator_router)
builder.add_node("ER", er_node)
builder.add_node("Radiology", radiology_node)
builder.add_node("PrimaryCare", primary_care_node)
builder.add_node("Cardiology", cardiology_node)
builder.add_node("Pediatrics", pediatrics_node)
builder.add_node("BillingInsurance", billing_node)

builder.set_entry_point("Operator")

builder.add_conditional_edges(
    "Operator",
    lambda x: x["next_node"],
    {
        "ER": "ER",
        "PrimaryCare": "PrimaryCare",
        "Pediatrics": "Pediatrics",
        "Radiology": "Radiology",
        "Cardiology": "Cardiology", 
        "BillingInsurance": "BillingInsurance",
        END: END
    }
)

for node in ["ER", "Radiology", "PrimaryCare", "Cardiology", "Pediatrics", "BillingInsurance"]:
    builder.add_edge(node, END)

memory_checkpointer = MemorySaver()

graph = builder.compile(checkpointer=memory_checkpointer)

In [None]:
display(Image(graph.get_graph().draw_mermaid_png()))


In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )

<br><br><br>

<hr style="border:30px solid coral "> </hr>
<hr style="border:2px solid coral "> </hr>


# Requirement 7

<hr style="border:2px solid coral "> </hr>


 Provide 6 runs that will demonstrate a fully functional application where the Operator AI Agent can route the message successfully to the target
department AI Agent who will respond to the inquiry received for every scenario listed below

#### How can I tell if my child has RSV (Respiratory Syncytial Virus)?

In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )

#### Can I visit my friend in the ER, and are there any restrictions?

In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )

#### How should I prepare for a CT scan, and are there any dietary restrictions?

In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )

#### I want to check if my recent test result has been reviewed by my primary care physician yet?

In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )

#### Do you have any available appointments with a cardiologist next week?

In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )

#### Can you help me understand my recent medical bill and whether my insurance covered the costs?

In [None]:
thread_id = input("Thread ID (use same ID to continue a conversation): ").strip() or "default-thread"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("User: ")
    if user_input.lower() in {"q", "quit"}:
        print("Goodbye!")
        break

    result = graph.invoke({"messages": [HumanMessage(content=user_input)]}, config=config)

    assistant_message = next(
        (m for m in reversed(result.get("messages", [])) if isinstance(m, AIMessage)),
        None,
    )
    response = assistant_message.content if assistant_message else result.get("response", "No Response Returned")
    active_agent = result.get("active_agent")
    last_router_reason = result.get("last_router_reason")
    print(
        f"\nUser: {user_input}\n"
        f"Assistant: {response}\n"
        f"state['active_agent']: {active_agent}\n"
        f"state['last_router_reason']: {last_router_reason}\n"
    )