In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.display import JSON, Markdown, display
import json

def show_json(raw, mode="text", indent=2):
    """
    mode = "tree"  -> interactive JSON widget
    mode = "text"  -> indented JSON
    mode = "both"  -> tree + indented JSON
    """
    if isinstance(raw, str):
        raw = json.loads(raw)

    if mode in ("tree", "both"):
        display(JSON(raw))

    if mode in ("text", "both"):
        pretty = json.dumps(raw, indent=indent, ensure_ascii=False)
        display(Markdown(f"```json\n{pretty}\n```"))

In [3]:
import os, sys

project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
sys.path.append(project_root)

import asyncio
from openai import OpenAI
from openai import AsyncOpenAI
from langchain_openai import ChatOpenAI
from langchain_neo4j import Neo4jGraph

from util.config_loader import load_config_api
from util.api_client import ApiClient
from llm.tool import (
    build_ontology_mapper_tool,
    build_patient_ner_tool,
    build_patient_ned_tool,
    build_general_medical_tool,
    build_patient_info_tool,
    build_patient_coverage_tool
)

# Connectors

In [4]:
url_llm = load_config_api("llm", path="../config.ini")
chat_client = ChatOpenAI(
    api_key="EMPTY",
    base_url=url_llm,
    model_name="google/medgemma-4b-it",
    temperature=0,
    max_tokens=8192,
    top_p=0.9,
    frequency_penalty=0.2,
    presence_penalty=0.0,
)

In [5]:
graph_client = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="password",
    database="cdl2025",
    enhanced_schema=True,
)

# Open LLM Tests and Virtualized Resources


## Single Request

In [6]:
client = OpenAI(
        api_key="EMPTY",                      
        base_url=url_llm
    )
resp = client.chat.completions.create(
    model="google/medgemma-4b-it",
    messages=[
        {"role": "user", "content": "Who are you?"}
    ],
    temperature=0,
)
response = resp.choices[0].message.content
response

'I am Gemma, an open-weights AI assistant. I am a large language model trained by Google DeepMind.\n'

## Batch Request


In [7]:
prompts = [
    "Explain the difference between type 1 and type 2 diabetes in simple terms. In one sentence.",
    "List the main symptoms of pneumonia and when a patient should see a doctor. In one sentence.",
    "What lifestyle changes can help reduce high blood pressure? In one sentence.",
    "Describe the importance of vaccination in preventing infectious diseases. In one sentence.",
    "What are the common treatments for seasonal allergies? In one sentence.",
    "How does regular exercise benefit mental health? In one sentence.",
    "What dietary changes can help manage cholesterol levels? In one sentence.",
]

async def run_batch(prompts, concurrency=8, max_tokens=128):
    client = AsyncOpenAI(api_key="EMPTY", base_url=url_llm)
    sem = asyncio.Semaphore(concurrency)

    async def one(p):
        async with sem:
            r = await client.chat.completions.create(
                model="google/medgemma-4b-it",
                messages=[{"role": "user", "content": p}],
                temperature=0.0,
                max_tokens=max_tokens,
                stream=False,
            )
            return r.choices[0].message.content

    tasks = [asyncio.create_task(one(p)) for p in prompts]
    out = await asyncio.gather(*tasks)
    await client.close()
    return out

answers = await run_batch(prompts, concurrency=8)
for q, a in zip(prompts, answers):
    print(q, "→", a)


Explain the difference between type 1 and type 2 diabetes in simple terms. In one sentence. → Type 1 diabetes is an autoimmune disease where the body attacks and destroys insulin-producing cells, while type 2 diabetes is a condition where the body doesn't use insulin properly, often due to resistance and/or insufficient production.

List the main symptoms of pneumonia and when a patient should see a doctor. In one sentence. → Pneumonia symptoms include cough (often with phlegm), fever, chills, shortness of breath, and chest pain. See a doctor if you have these symptoms, especially if you are elderly, have a chronic condition, or are immunocompromised.

What lifestyle changes can help reduce high blood pressure? In one sentence. → Lifestyle changes like diet modification (reducing sodium and increasing potassium), regular exercise, weight management, limiting alcohol consumption, and quitting smoking can significantly reduce high blood pressure.

Describe the importance of vaccination i

## Virtualized Resources

In [8]:
graph_client.query("MATCH (n:HpoPhenotype) RETURN n.label OFFSET 100 LIMIT 10 ")

[{'n.label': 'Thick lower lip vermilion'},
 {'n.label': 'Abnormal uvula morphology'},
 {'n.label': 'Microglossia'},
 {'n.label': 'Abnormal palate morphology'},
 {'n.label': 'Broad alveolar ridges'},
 {'n.label': 'Narrow palate'},
 {'n.label': 'Short upper lip'},
 {'n.label': 'Lobulated tongue'},
 {'n.label': 'Tongue muscle weakness'},
 {'n.label': 'Movement abnormality of the tongue'}]

In [9]:
graph_client.query("MATCH (n:Patient) RETURN n.label OFFSET 100 LIMIT 10 ")



[]

In [10]:
graph_client.query("""
    CALL apoc.dv.query('patient', {patientId:'P003'}) YIELD node AS v
    RETURN v LIMIT 1
""")

[{'v': {'Condition': 'Multiple mononeuropathy',
   'Encounter.period.start': '2017-06-02',
   'EncounterID': 'E001',
   'Encounter.reasonCode': 'Progressive leg tingling and burning',
   'Encounter.hospitalization.dischargeDisposition': 'Home',
   'Encounter.class': 'Ambulatory',
   'Observation[key]': 'Neurologic exam: patchy distal sensory loss and reduced vibration sense in both feet',
   'DiagnosticReport': 'NCS/EMG: multifocal demyelinating features in lower limb nerves',
   'NED_Entities': '[{\'source\': \'concat\', \'start\': 14, \'end\': 24, \'text\': \'leg tingling\', \'label\': \'Diseases of the nervous system\', \'assertion\': \'present\', \'temporality\': \'acute\', \'rationale\': \'Patient reports leg tingling.\', \'icd_id\': \'G57.1\', \'icd_label\': \'Meralgia paraesthetica\', \'confidence\': 0.95, \'linking_rationale\': "The mention of \'leg tingling\' aligns with the ICD-10 code G57.1 (Meralgia paraesthetica), which describes the tingling sensation caused by compressio

In [11]:
graph_client.query("""
    CALL apoc.dv.query('patient', {patientId: 'P003'}) YIELD node AS v
    WITH apoc.convert.fromJsonList(
           coalesce(
               apoc.any.property(v, 'ICD10_Codes'),
               apoc.any.property(v, '\uFEFFICD10_Codes')
           )
         ) AS codes
    RETURN codes
    LIMIT 1
""")

[{'codes': ['G57.1',
   'T28.1',
   'S84',
   'R52',
   'G58.7',
   'S94.7',
   'R26.2',
   'M54.8',
   'R26.0',
   'G58.7']}]

## Ontology Mapping

In [12]:
payload = {
    "source_concept": "H53.14 Photophobia",
    "source_context": """
    {
      "id": "H53.14",
      "name": "Photophobia",
      "parentName": "Visual disturbances",
      "group":   { "groupName": "Other visual disturbances" },
      "chapter": { "chapterName": "Diseases of the eye and adnexa" }
    }
    """,
    "candidate_list": """
    [
      {
        "id": "HP:0001001",
        "label": "Intolerance to bright light",
        "exactSynonym": [
          "Light sensitivity",
          "Sensitivity to light",
          "Visual intolerance to light"
        ],
        "description": "Discomfort or pain provoked by exposure to bright or moderate light.",
        "comment": "Patients may describe this as \\\"photophobia\\\" even when ocular exam is unremarkable."
      },
      {
        "id": "HP:0001002",
        "label": "Abnormal visual response to light",
        "exactSynonym": [
          "Abnormal sensitivity to light",
          "Increased visual sensitivity"
        ],
        "description": "An exaggerated or unpleasant visual reaction to normal levels of light.",
        "comment": "Often overlaps with the symptom commonly called photophobia."
      },
      {
        "id": "HP:0001003",
        "label": "Light-triggered headache",
        "exactSynonym": [
          "Headache induced by light exposure"
        ],
        "description": "Head pain that is precipitated or worsened by exposure to light.",
        "comment": "Frequently co-occurs with patient-reported photophobia in migraine and meningitis."
      }
    ]
    """,
}

tool_output = build_ontology_mapper_tool(chat_client).invoke(payload)
show_json(tool_output)

```json
{
  "best_id": "HP:0001001",
  "best_label": "Intolerance to bright light",
  "confidence": 0.9,
  "rationale": "Photophobia is a common term for intolerance to bright light. This is the most specific and semantically equivalent concept.",
  "support": {
    "evidence": "Photophobia is often described as intolerance to bright light.",
    "reason": "The description of photophobia aligns with the definition of intolerance to bright light."
  }
}
```

# Patient Annotation

### Named Entity Recognition

DO NOT TRUST THE CHARACTER INDEX!

In [13]:
payload = {
    "patient_id": "patient_002",
    "encounter_id": "enc_145",
    "icd_chapters": [
        "Certain infectious and parasitic diseases",
        "Neoplasms",
        "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism",
        "Endocrine, nutritional and metabolic diseases",
        "Mental and behavioural disorders",
        "Diseases of the nervous system",
        "Diseases of the eye and adnexa",
        "Diseases of the ear and mastoid process",
        "Diseases of the circulatory system",
        "Diseases of the respiratory system",
        "Diseases of the digestive system",
        "Diseases of the skin and subcutaneous tissue",
        "Diseases of the musculoskeletal system and connective tissue",
        "Diseases of the genitourinary system",
        "Pregnancy, childbirth and the puerperium",
        "Certain conditions originating in the perinatal period",
        "Congenital malformations, deformations and chromosomal abnormalities",
        "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
        "Injury, poisoning and certain other consequences of external causes",
        "External causes of morbidity and mortality",
        "Factors influencing health status and contact with health services",
        "Codes for special purposes"
    ],
    "concat_text": (
        "Abdominal pain | "
        "Shortness of breath and chest tightness | "
        "Episodic dizziness and palpitations | "
        "Hypertension, type 2 diabetes mellitus, and iron-deficiency anemia | "
        "No Chest pain"
    ),
    "narrative_text": (
        "No narrative text provided."
    )
}


tool_output = build_patient_ner_tool(chat_client).invoke(payload)
show_json(tool_output)

```json
{
  "patient_id": "patient_002",
  "encounter_id": "enc_145",
  "entities": [
    {
      "source": "concat",
      "start": 13,
      "end": 26,
      "text": "Abdominal pain",
      "label": "Diseases of the digestive system",
      "assertion": "present",
      "temporality": "unspecified",
      "rationale": "Present in the concatenated text."
    },
    {
      "source": "concat",
      "start": 31,
      "end": 46,
      "text": "Shortness of breath and chest tightness",
      "label": "Diseases of the respiratory system",
      "assertion": "present",
      "temporality": "unspecified",
      "rationale": "Present in the concatenated text."
    },
    {
      "source": "concat",
      "start": 51,
      "end": 68,
      "text": "Episodic dizziness and palpitations",
      "label": "Diseases of the nervous system",
      "assertion": "present",
      "temporality": "unspecified",
      "rationale": "Present in the concatenated text."
    },
    {
      "source": "concat",
      "start": 73,
      "end": 98,
      "text": "Hypertension, type 2 diabetes mellitus, and iron-deficiency anemia",
      "label": "Endocrine, nutritional and metabolic diseases",
      "assertion": "present",
      "temporality": "unspecified",
      "rationale": "Present in the concatenated text."
    },
    {
      "source": "concat",
      "start": 103,
      "end": 112,
      "text": "No Chest pain",
      "label": "Diseases of the circulatory system",
      "assertion": "negated",
      "temporality": "unspecified",
      "rationale": "The patient denies chest pain."
    },
    {
      "source": "narrative",
      "start": 0,
      "end": 15,
      "text": "No narrative text provided.",
      "label": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
      "assertion": "unspecified",
      "temporality": "unspecified",
      "rationale": "No narrative text provided. This is a placeholder for a potential symptom or sign that may be present but not explicitly mentioned in the narrative."
    }
  ]
}
```

### Named Entity Disambiguation

In [14]:
ned_payload = {
    "mention": {
        "source": "concat",
        "start": 245,
        "end": 260,
        "text": "terminal ileitis",
        "label": "Diseases of the digestive system",
        "assertion": "present",
        "temporality": "chronic",
        "rationale": (
            "Imaging and colonoscopy describe inflammation of the terminal ileum. "
            "Clinical context (chronic diarrhea, weight loss, prior Crohn diagnosis) "
            "suggests inflammatory bowel disease rather than acute infection."
        ),
    },
    "candidates": [
        # 1) Highest surface similarity to "terminal ileitis" (generic, non-IBD specific)
        {
            "score": 0.912,
            "id": "K52.9",
            "label": "Noninfective gastroenteritis and colitis, unspecified"
        },
        # 2) Another high-similarity but acute/infective interpretation
        {
            "score": 0.887,
            "id": "A09.0",
            "label": "Infectious gastroenteritis and colitis, unspecified"
        },
        # 3) Generic ileitis, not clearly Crohn-related
        {
            "score": 0.871,
            "id": "K52.0",
            "label": "Gastroenteritis and colitis due to radiation"
        },
        # 4) **Target node** – Crohn’s disease of small intestine (fits global context)
        {
            "score": 0.842,
            "id": "K50.00",
            "label": "Crohn's disease of small intestine without complications"
        },
        # 5) Crohn’s disease of both small and large intestine (possible but less supported)
        {
            "score": 0.824,
            "id": "K50.80",
            "label": "Crohn's disease of both small and large intestine without complications"
        },
        # 6) Ulcerative colitis – another IBD differential but mismatched pattern
        {
            "score": 0.801,
            "id": "K51.90",
            "label": "Ulcerative colitis, unspecified, without complications"
        },
    ],
    "other_mentions": [
        {
            "text": "long-standing Crohn disease diagnosed at age 19",
            "label": "Diseases of the digestive system",
        },
        {
            "text": "skip lesions in terminal ileum and ascending colon on colonoscopy",
            "label": "Diseases of the digestive system",
        },
        {
            "text": "chronic watery diarrhea and 7 kg unintentional weight loss in 6 months",
            "label": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
        },
        {
            "text": "non-caseating granulomas on ileal biopsy",
            "label": "Diseases of the digestive system",
        },
    ],
}

# Invoke the Patient NED tool
ned_tool_output = build_patient_ned_tool(chat_client).invoke(ned_payload)
show_json(ned_tool_output)


```json
{
  "source": "concat",
  "start": 245,
  "end": 260,
  "text": "terminal ileitis",
  "label": "Diseases of the digestive system",
  "assertion": "present",
  "temporality": "chronic",
  "rationale": "The mention describes inflammation of the terminal ileum, which is consistent with Crohn's disease or ulcerative colitis. The chronic nature of the condition is also supported by the other mentions.",
  "icd_id": "K50.00",
  "icd_label": "Crohn's disease of small intestine without complications",
  "confidence": 0.95,
  "linking_rationale": "The mention describes inflammation of the terminal ileum, which is consistent with Crohn's disease or ulcerative colitis. The chronic nature of the condition is also supported by the other mentions."
}
```

## Question and Answers

### General Medical Questions (Ontology-Driven)

In [31]:
questions = [
    #"For cystic fibrosis, what symptoms are documented and what is the evidence supporting them?",
    #"Which phenotypes of Marfan syndrome have published clinical study evidence, and what are their frequencies?",
    #"For Duchenne muscular dystrophy, list all associated symptoms and include the supporting publications.",
    #"Which diseases are associated with short stature? Show me the reported frequencies.",
    #"List all phenotypes of Rett syndrome with their frequencies and PubMed URLs.",
    "Which diseases are linked to the phenotype 'muscle weakness' supported by published clinical studies?",
    #"Find diseases associated with 'recurrent infections' and show the evidence descriptions.",
    #"Which diseases have phenotypes with adult onset and what sources support them?", # Useful to show different use cases
    #"Which diseases are treated with insulin?",
    "Find phenotypes associated with the influenza disease and include the mapping support text.",
    # "For the ICD disease 'Typhoid fever', what HPO phenotypes are mapped and with what confidence?",
    "What is the average confidence of all HPO mappings to ICD for Cholera?",
    "List diseases associated with recurrent fever supported by published clinical studies only.",
    #"Which phenotypes of Marfan syndrome have evidence type PCS?",
]

for q in questions:
    print("\n=== Question ===")
    print(q)
    general_medical_tool_output = build_general_medical_tool(chat_client).invoke({
        "question": q,
        "top_k": 10
    })
    print("=== Output ===")
    print(general_medical_tool_output["explanation"])



=== Question ===
Which diseases are linked to the phenotype 'muscle weakness' supported by published clinical studies?
=== Output ===
Answer:
According to the ontology, the following diseases are linked to the phenotype 'muscle weakness' supported by published clinical studies.

*   **Adrenoleukodystrophy (OMIM:300100)**: This is a genetic disorder affecting the adrenal glands and white matter of the brain. The phenotype is "Lower limb muscle weakness" (HP:0007340). The frequency is 1/2, and the source is PMID:24316281.
*   **Amyotrophic lateral sclerosis 12 with or without frontotemporal dementia (OMIM:613435)**: This is a progressive neurodegenerative disease affecting motor neurons. The phenotype is "Muscle weakness" (HP:0001324). The frequency is 8/8, and the source is PMID:20428114.
*   **Amyotrophic lateral sclerosis 16, juvenile (OMIM:614373)**: This is a form of amyotrophic lateral sclerosis that affects children. The phenotype is "Lower limb muscle weakness" (HP:0007340). The

### Patient Tool (Virtualized Resources)

In [16]:
patient_payload = {
    "patient_id": "P003",
    #"question": "Provide a concise clinical summary of patient P003 using all documented findings.",
    "question": "Provide me details on the follow up plan of patient P003 in the latest encounter.",
}

patient_tool_output = build_patient_info_tool(chat_client).invoke(patient_payload)
patient_tool_output

{'patient_id': 'P003',
 'question': 'Provide me details on the follow up plan of patient P003 in the latest encounter.',
 'encounter_date': 'latest',
 'has_data': True,
 'answer': 'Answer:\nThe follow-up plan for patient P003 in the latest encounter on 2026-03-03 is to monitor for new skin changes and reassess if the lesions spread. The patient is instructed to use regular emollients.\n',
 'raw_patient_view': [{'patient_id': 'P003',
   'identity': None,
   'labels': [],
   'elementId': None,
   'condition': 'Striae distensae',
   'chief_complaint': 'Thin streak-like marks on the trunk',
   'course_trend': 'Stable',
   'comorbidities': 'Multiple mononeuropathy, recurrent inflammatory episodes',
   'plan_followup': 'Monitor for new skin changes and reassess if lesions spread',
   'medication_statement': 'Emollient cream topically BID',
   'notes': 'Skin findings unchanged since onset',
   'encounter': {'id': 'E012',
    'period_start': '2026-03-03',
    'reason_code': 'New skin streaks o

### Coverage Tool (using the ICD-HPO mappings)

In [17]:
graph_client.query("""
    CALL apoc.dv.query('patient', {patientId: 'P003'}) YIELD node AS v
    WITH apoc.convert.fromJsonList(
           coalesce(
               apoc.any.property(v, 'ICD10_Codes'),
               apoc.any.property(v, '\uFEFFICD10_Codes')
           )
         ) AS codes
    RETURN codes
    LIMIT 1
""")

[{'codes': ['G57.1',
   'T28.1',
   'S84',
   'R52',
   'G58.7',
   'S94.7',
   'R26.2',
   'M54.8',
   'R26.0',
   'G58.7']}]

In [18]:
graph_client.query("""
    CALL apoc.dv.query('patient', {patientId: 'P003'}) 
    YIELD node AS v

    WITH
    apoc.convert.fromJsonList(
        coalesce(
        apoc.any.property(v, 'ICD10_Codes'),
        apoc.any.property(v, '\uFEFFICD10_Codes')
        )
    ) AS codes,
    v AS _Patient
    WITH codes, _Patient
    LIMIT 1

    UNWIND codes AS code

    MATCH (d:IcdDisease {id: code})
        <-[:UMLS_TO_ICD]-
        (u:UMLS)
        -[:UMLS_TO_HPO_PHENOTYPE]->
        (h:HpoPhenotype)

    RETURN DISTINCT
    apoc.any.property(_Patient, 'PatientId') AS PatientId,
    h.label AS HPO_Label,
    h.id AS hpo_id,
    // Fancy ASCII path description
    'Patient[' + apoc.any.property(_Patient, 'PatientId') + ']'
    + ' --ICD10[' + code + ']--> '
    + 'HPO[' + h.id + ' | ' + h.label + ']' AS PathDescription;

""")

[{'PatientId': 'P003',
  'HPO_Label': 'Pain',
  'hpo_id': 'HP:0012531',
  'PathDescription': 'Patient[P003] --ICD10[R52]--> HPO[HP:0012531 | Pain]'},
 {'PatientId': 'P003',
  'HPO_Label': 'Multiple mononeuropathy',
  'hpo_id': 'HP:0032018',
  'PathDescription': 'Patient[P003] --ICD10[G58.7]--> HPO[HP:0032018 | Multiple mononeuropathy]'},
 {'PatientId': 'P003',
  'HPO_Label': 'Gait ataxia',
  'hpo_id': 'HP:0002066',
  'PathDescription': 'Patient[P003] --ICD10[R26.0]--> HPO[HP:0002066 | Gait ataxia]'}]

In [None]:
coverage_payload = {
    "patient_id": "P003",
    "limit": 20
}

coverage_tool_output = build_patient_coverage_tool(chat_client).invoke(coverage_payload)
coverage_tool_output




{'cypher': 'MULTI-STEP PIPELINE: get_patient_icd_codes -> map_icd_to_hpo -> rollup_hpo_to_ancestors -> compute_coverage',
 'rows': [{'diseaseId': 'ORPHA:117',
   'diseaseName': 'Behçet disease',
   'covered': 16,
   'total': 85,
   'coveragePct': 18.8,
   'missingHpoIds': ['HP:0002204',
    'HP:0002202',
    'HP:0007813',
    'HP:0200039',
    'HP:0001637',
    'HP:0100820',
    'HP:0002376',
    'HP:0001482',
    'HP:0000031',
    'HP:0000155',
    'HP:0002014',
    'HP:0001289',
    'HP:0000518',
    'HP:0100584',
    'HP:0100614',
    'HP:0012378',
    'HP:0100653',
    'HP:0200034',
    'HP:0100654',
    'HP:0000083',
    'HP:0002383',
    'HP:0001250',
    'HP:0012219',
    'HP:0002024',
    'HP:0001097',
    'HP:0012819',
    'HP:0002027',
    'HP:0001653',
    'HP:0000488',
    'HP:0002105',
    'HP:0002102',
    'HP:0001658',
    'HP:0001733',
    'HP:0001659',
    'HP:0100326',
    'HP:0010885',
    'HP:4000041',
    'HP:0001061',
    'HP:0000099',
    'HP:0002076',
    'HP:00

# GraphRAG Agent

In [33]:
from llm.agent import run_agent


In [34]:
out = run_agent("Show possible diseases by HPO coverage for patientId:'P003'. Report the covered, total, and percentage coverage.")
print(out["final_answer"])



DEBUG: Retrieved ICD codes: ['A68', 'A68.9', 'B48.8', 'F40.0', 'F43.8', 'F51.4', 'F59', 'F90.0', 'G02.1', 'G03.0', 'G44', 'G44.2', 'G57.1', 'G58.7', 'H30.0', 'H30.2', 'H35.0', 'H43.0', 'H49.1', 'H49.2', 'H49.3', 'H50.2', 'H53.2', 'H53.9', 'H57.1', 'I44.0', 'I47', 'I48.4', 'I49.4', 'K03.0', 'K07.2', 'K07.6', 'L90.6', 'M13.1', 'M23.4', 'M24.5', 'M25.4', 'M25.5', 'M25.6', 'M54.2', 'M54.8', 'R00.2', 'R11', 'R22.4', 'R23.8', 'R25.0', 'R26.0', 'R26.2', 'R41.1', 'R41.8', 'R42', 'R50', 'R50.8', 'R51', 'R52', 'R52.9', 'R53', 'R55', 'R70.0', 'R83.4', 'S03.4', 'S13.4', 'S84', 'S94.7', 'T03.0', 'T28.1', 'W10', 'Z50']
Based on the provided data, the following diseases are covered for patientId 'P003': Behçet disease (16/85, 18.8%), Lyme disease (15/24, 62.5%), Brucellosis (15/77, 19.5%), Developmental delay, impaired speech, and behavioral abnormalities (15/187, 8.0%), and Microscopic polyangiitis (11/55, 20.0%).



In [None]:
out = run_agent("Provide me details on the follow up plan of patient P003 in the latest encounter.")
print(out["final_answer"])

Answer:
The patient's clinical picture includes multiple conditions, including multiple mononeuropathy, posterior uveitis, recurrent fever, tension-type headache, and now, recurrent low-grade fever.

Presenting problem and course: The patient presented with recurrent low-grade fevers and chills. He denied cough, urinary symptoms, or recent travel. Laboratory evaluation showed mildly elevated C-reactive protein and erythrocyte sedimentation rate. He was referred to infectious disease specialists for further serologic and imaging workup.

Examination and investigations: Physical examination was largely unremarkable. Laboratory evaluation showed mildly elevated C-reactive protein and erythrocyte sedimentation rate, while complete blood count remained normal. He was instructed to keep a detailed symptom and temperature diary and was referred to infectious disease specialists for further serologic and imaging workup to clarify the cause of the recurrent low-grade fevers and systemic malaise

In [35]:
out = run_agent("Show possible diseases by HPO coverage for patientId:'P003'. Report the covered, total, and percentage coverage.")
print(out["final_answer"])



DEBUG: Retrieved ICD codes: ['A68', 'A68.9', 'B48.8', 'F40.0', 'F43.8', 'F51.4', 'F59', 'F90.0', 'G02.1', 'G03.0', 'G44', 'G44.2', 'G57.1', 'G58.7', 'H30.0', 'H30.2', 'H35.0', 'H43.0', 'H49.1', 'H49.2', 'H49.3', 'H50.2', 'H53.2', 'H53.9', 'H57.1', 'I44.0', 'I47', 'I48.4', 'I49.4', 'K03.0', 'K07.2', 'K07.6', 'L90.6', 'M13.1', 'M23.4', 'M24.5', 'M25.4', 'M25.5', 'M25.6', 'M54.2', 'M54.8', 'R00.2', 'R11', 'R22.4', 'R23.8', 'R25.0', 'R26.0', 'R26.2', 'R41.1', 'R41.8', 'R42', 'R50', 'R50.8', 'R51', 'R52', 'R52.9', 'R53', 'R55', 'R70.0', 'R83.4', 'S03.4', 'S13.4', 'S84', 'S94.7', 'T03.0', 'T28.1', 'W10', 'Z50']
Based on the provided data, the following diseases are covered for patientId 'P003': Behçet disease (16/85, 18.8%), Lyme disease (15/24, 62.5%), Brucellosis (15/77, 19.5%), Developmental delay, impaired speech, and behavioral abnormalities (15/187, 8.0%), and Microscopic polyangiitis (11/55, 20.0%).



# Embedding API

In [20]:
cfg = load_config_api("embedding", path="../config.ini")
api = ApiClient(cfg)

Base URL set to: https://civilly-uncognizable-nilda.ngrok-free.dev


In [21]:
body, status, headers = api.post('/embed', {'input': ['Who are you?', 'What is your name?']})
print("Status:", status)
print("Headers:", headers)
print("Body:", body)

Status: 200
Headers: {'Content-Length': '153945', 'Content-Type': 'application/json', 'Date': 'Mon, 17 Nov 2025 13:00:04 GMT', 'Server': 'uvicorn', 'Connection': 'close'}
Body: {'model': 'Alibaba-NLP/gte-Qwen2-7B-instruct', 'num_inputs': 2, 'embedding_dims': 3584, 'data': [[0.006530743092298508, 0.0008580876747146249, -0.00030612858245149255, 0.017885329201817513, 0.002949966350570321, -0.009721901267766953, 0.013358337804675102, 0.0017903883708640933, -0.022115470841526985, 0.02434185892343521, 0.007495511788874865, -0.015287875197827816, 0.00515780271962285, -0.006493636406958103, 0.02345130406320095, 0.01001875288784504, 0.021224914118647575, -0.019592229276895523, -0.006976020988076925, -0.00500937644392252, 0.02003750577569008, 0.0021150701213628054, -0.007495511788874865, -0.0007838746532797813, 0.006011251825839281, -0.01617843098938465, -0.015584727749228477, -0.03250528872013092, 0.0043043531477451324, 0.023006025701761246, 0.017365839332342148, 0.004155927337706089, 0.0017068

# Semantic Search

In [22]:
import configparser
from database.neo4j_db import Neo4jGraphDB

config = configparser.ConfigParser()
config.read('../config.ini')
neo4j_graph = Neo4jGraphDB(database=config["neo4j"]["database"])
driver = neo4j_graph._driver
with driver.session(database=neo4j_graph._database) as session:
    result = session.run("MATCH (n) RETURN count(n) AS node_count")
    node_count = result.single()["node_count"]
print(f"Total number of nodes in the Neo4j database: {node_count}")


Total number of nodes in the Neo4j database: 93325


In [23]:
user_query = "muscle weakness"
q_embed = api.post('/embed', {'input': [user_query]})[0]['data'][0]
k = 10

with driver.session(database=neo4j_graph._database) as sess:
    result = sess.run("""
        CALL db.index.vector.queryNodes('hpo_phenotype_embedding', $k, $qe)
        YIELD node, score
        RETURN node.id as id, node.label as label, score
        ORDER BY score DESC
        LIMIT $k
    """, k=k, qe=q_embed)
    for rec in result:
        print(f"{rec['score']:.3f}  {rec['label']}  (id={rec['id']})")

0.929  Muscle weakness  (id=HP:0001324)
0.848  Limb muscle weakness  (id=HP:0003690)
0.844  Generalized muscle weakness  (id=HP:0003324)
0.843  Muscle flaccidity  (id=HP:0010547)
0.823  Weakness due to upper motor neuron dysfunction  (id=HP:0010549)
0.822  Generalized weakness of limb muscles  (id=HP:0009028)
0.821  Weakness of muscles of respiration  (id=HP:0004347)
0.820  Fatigable weakness of skeletal muscles  (id=HP:0030197)
0.816  Progressive muscle weakness  (id=HP:0003323)
0.815  Hand muscle weakness  (id=HP:0030237)


# Customized API

In [24]:
cfg = load_config_api("chat", path="../config.ini")
api = ApiClient(cfg)

Base URL set to: [DEFAULT_CHAT_API_URI]


In [25]:
body, status, headers = api.post('/ask', {'question': 'Who are you?'})
print("Status:", status)
print("Headers:", headers)
print("Body:", body)

ValueError: unknown url type: '[DEFAULT_CHAT_API_URI]/ask'