In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys

project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
sys.path.append(project_root)

from openai import OpenAI
from langchain_openai import ChatOpenAI

from util.config_loader import load_config_api
from util.api_client import ApiClient
from llm.tool import (
    build_ontology_mapper_tool,
    build_patient_ner_tool,
    build_patient_ned_tool,
    build_general_medical_tool,
    build_patient_info_tool,
    build_patient_coverage_tool
)

# OpenAI API


In [3]:
url_llm = load_config_api("llm", path="../config.ini")
print("LLM URL:", url_llm)

LLM URL: https://chatcompletion-uncognizable-nilda.ngrok-free.dev/v1


## Single Request

In [4]:
client = OpenAI(
        api_key="EMPTY",                      
        base_url=url_llm
    )
resp = client.chat.completions.create(
    model="google/medgemma-4b-it",
    messages=[
        {"role": "user", "content": "Who are you?"}
    ],
    temperature=0,
)
response = resp.choices[0].message.content
response

'I am Gemma, an open-weights AI assistant. I am a large language model trained by Google DeepMind.\n'

## Batch Request


In [5]:
import asyncio
from openai import AsyncOpenAI

In [6]:
prompts = [
    "Explain the difference between type 1 and type 2 diabetes in simple terms. In one sentence.",
    "List the main symptoms of pneumonia and when a patient should see a doctor. In one sentence.",
    "What lifestyle changes can help reduce high blood pressure? In one sentence."
]

async def run_batch(prompts, concurrency=8, max_tokens=128):
    client = AsyncOpenAI(api_key="EMPTY", base_url=url_llm)
    sem = asyncio.Semaphore(concurrency)

    async def one(p):
        async with sem:
            r = await client.chat.completions.create(
                model="google/medgemma-4b-it",
                messages=[{"role": "user", "content": p}],
                temperature=0.0,
                max_tokens=max_tokens,
                stream=False,
            )
            return r.choices[0].message.content

    tasks = [asyncio.create_task(one(p)) for p in prompts]
    out = await asyncio.gather(*tasks)
    await client.close()
    return out

answers = await run_batch(prompts, concurrency=8)
for q, a in zip(prompts, answers):
    print(q, "→", a)


Explain the difference between type 1 and type 2 diabetes in simple terms. In one sentence. → Type 1 diabetes is an autoimmune disease where the body attacks and destroys insulin-producing cells, while type 2 diabetes is a condition where the body doesn't use insulin properly, often due to resistance and/or insufficient production.

List the main symptoms of pneumonia and when a patient should see a doctor. In one sentence. → Pneumonia symptoms include cough (often with phlegm), fever, chills, shortness of breath, and chest pain. See a doctor if you have these symptoms, especially if you are elderly, have a chronic condition, or are immunocompromised.

What lifestyle changes can help reduce high blood pressure? In one sentence. → Lifestyle changes like diet modifications (reducing sodium and increasing potassium), regular exercise, maintaining a healthy weight, limiting alcohol consumption, and quitting smoking can significantly reduce high blood pressure.



# LangChain Tools

In [7]:
chat_client = ChatOpenAI(
    api_key="EMPTY",
    base_url=url_llm,
    model_name="google/medgemma-4b-it",
    temperature=0,
    max_tokens=8192,
    top_p=0.9,
    stop=["<end_of_turn>", "</s>", "\nUser:", "\n\nUser:"],
    frequency_penalty=0.2,
    presence_penalty=0.0,
)

## Ontology Mapping

In [8]:
payload = {
    "source_concept": "Acute myocardial infarction",
    "source_context": "Patient with chest pain, ST-elevation, troponin markedly elevated.",
    "candidate_list": '[{"id":"SCTID:22298006","label":"Myocardial infarction"}, {"id":"SCTID:57054005","label":"Acute coronary syndrome"}]',
}
tool_output = build_ontology_mapper_tool(chat_client).invoke(payload)
print(tool_output)

{'best_id': 'SCTID:22298006', 'best_label': 'Myocardial infarction', 'confidence': 0.9, 'rationale': 'Acute myocardial infarction is a specific type of acute coronary syndrome. The context describes ST-elevation and elevated troponin, which are indicative of myocardial infarction.', 'support': {'evidence': 'ST-elevation, elevated troponin', 'reason': 'These are key indicators of myocardial infarction.'}}


# Patient Annotation

### Named Entity Recognition

In [9]:
payload={"patient_id":"patient_002",
         "encounter_id":"enc_145",
         "icd_chapters":["Certain infectious and parasitic diseases","Neoplasms","Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism","Endocrine, nutritional and metabolic diseases","Mental and behavioural disorders","Diseases of the nervous system","Diseases of the eye and adnexa","Diseases of the ear and mastoid process","Diseases of the circulatory system","Diseases of the respiratory system","Diseases of the digestive system","Diseases of the skin and subcutaneous tissue","Diseases of the musculoskeletal system and connective tissue","Diseases of the genitourinary system","Pregnancy, childbirth and the puerperium","Certain conditions originating in the perinatal period","Congenital malformations, deformations and chromosomal abnormalities","Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified","Injury, poisoning and certain other consequences of external causes","External causes of morbidity and mortality","Factors influencing health status and contact with health services","Codes for special purposes"],
         "concat_text":"Shortness of breath and chest tightness | Episodic dizziness and palpitations | Hypertension, type 2 diabetes mellitus, and iron-deficiency anemia",
         "narrative_text":"The patient reports three months of exertional shortness of breath and intermittent chest tightness. He has a history of hypertension and type 2 diabetes mellitus; home blood pressures remain poorly controlled. No chest pain today, but occasional palpitations and lightheadedness occur weekly. Recent labs showed low ferritin consistent with iron-deficiency anemia."}

tool_output = build_patient_ner_tool(chat_client).invoke(payload)
print(tool_output)

{'patient_id': 'patient_002', 'encounter_id': 'enc_145', 'entities': [{'source': 'concat', 'start': 16, 'end': 28, 'text': 'Shortness of breath', 'label': 'Diseases of the respiratory system', 'assertion': 'present', 'temporality': 'history', 'rationale': 'Patient reports exertional shortness of breath.'}, {'source': 'concat', 'start': 34, 'end': 50, 'text': 'chest tightness', 'label': 'Diseases of the respiratory system', 'assertion': 'present', 'temporality': 'history', 'rationale': 'Patient reports intermittent chest tightness.'}, {'source': 'concat', 'start': 56, 'end': 74, 'text': 'dizziness', 'label': 'Mental and behavioural disorders', 'assertion': 'present', 'temporality': 'history', 'rationale': 'Patient reports episodic dizziness.'}, {'source': 'concat', 'start': 78, 'end': 96, 'text': 'palpitations', 'label': 'Diseases of the circulatory system', 'assertion': 'present', 'temporality': 'history', 'rationale': 'Patient reports occasional palpitations.'}, {'source': 'concat', '

### Named Entity Disambiguation

In [10]:
ned_payload = {
    "mention": {
        "source": "concat",
        "start": 0,
        "end": 19,  # [0, 19) covers "Shortness of breath"
        "text": "Shortness of breath",
        "label": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
        "assertion": "present",
        "temporality": "chronic",  # "three months of exertional shortness of breath"
        "rationale": "Symptom present for 3 months; no confirmed respiratory/cardiac diagnosis stated."
    },
    "candidates": [
        {"score": 0.861, "id": "R06.0", "label": "Dyspnea"},                          # symptom code (best fit)
        {"score": 0.784, "id": "J45",   "label": "Asthma"},                           # possible underlying cause
        {"score": 0.762, "id": "I26.9", "label": "Pulmonary embolism, unspecified"},  # workup pending in note
        {"score": 0.741, "id": "I20.9", "label": "Angina pectoris, unspecified"},
        {"score": 0.709, "id": "R07.1", "label": "Chest pain"},
        {"score": 0.688, "id": "R09.02","label": "Hypoxemia"}
    ],
    "other_mentions": [
        {"text": "chest tightness", "label": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified"},
        {"text": "palpitations",    "label": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified"},
        {"text": "iron-deficiency anemia", "label": "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism"}
    ]
}

# Invoke the Patient NED tool
ned_tool_output = build_patient_ned_tool(chat_client).invoke(ned_payload)
print(ned_tool_output)

{'source': 'concat', 'start': 0, 'end': 19, 'text': 'Shortness of breath', 'label': 'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified', 'assertion': 'present', 'temporality': 'chronic', 'rationale': 'Symptom present for 3 months; no confirmed respiratory/cardiac diagnosis stated.', 'icd_id': 'R06.0', 'icd_label': 'Dyspnea', 'confidence': 0.95, 'linking_rationale': 'The mention is for shortness of breath, which is a symptom of dyspnea. The other mentions are symptoms as well.'}


## Question and Answers

### General Medical Questions

In [11]:
questions = [
    "For cystic fibrosis, what symptoms are documented and what is the evidence supporting them?",
    "Which phenotypes of Marfan syndrome have published clinical study evidence, and what are their frequencies?",
    "For Duchenne muscular dystrophy, list all associated symptoms and include the supporting publications.",
    "Which diseases are associated with short stature? Show me the reported frequencies.",
    "List all phenotypes of Rett syndrome with their frequencies and PubMed URLs.",
    "Which diseases are linked to the phenotype 'muscle weakness' supported by published clinical studies?",
    "Find diseases associated with 'recurrent infections' and show the evidence descriptions.",
    "Which diseases have phenotypes with adult onset and what sources support them?", # Useful to show different use cases
    "Which diseases are treated with insulin?",
    "Find HPO phenotypes associated with the ICD disease 'Influenza' and include the mapping support text.",
    "For the ICD disease 'Typhoid fever', what HPO phenotypes are mapped and with what confidence?",
    "What is the average confidence of all HPO mappings to ICD for Cholera?",
    "List diseases associated with recurrent fever supported by published clinical studies only.",
    "Which phenotypes of Marfan syndrome have evidence type PCS?",
]

for q in questions:
    print("\n=== Question ===")
    print(q)
    general_medical_tool_output = build_general_medical_tool(chat_client).invoke({
        "question": q,
        "top_k": 10
    })
    print("=== Output ===")
    print(general_medical_tool_output["explanation"])



=== Question ===
For cystic fibrosis, what symptoms are documented and what is the evidence supporting them?
=== Output ===
Answer:
According to the ontology, cystic fibrosis is associated with several phenotypes.

*   **Abnormality of the liver** (HP:0001392) is associated with cystic fibrosis. This phenotype is defined as an abnormality of the liver.
*   **Abnormality of the male genital system** (HP:0000001) is associated with cystic fibrosis. This phenotype is defined as an abnormality of the male genital system.
*   **Absent vas deferens** (HP:0002099) is associated with cystic fibrosis. This phenotype is defined as the absence of vas deferens.
*   **Asthma** (HP:0002613) is associated with cystic fibrosis. This phenotype is defined as asthma.
*   **Bronchiectasis** (HP:0002110) is associated with cystic fibrosis. This phenotype is defined as bronchiectasis.
*   **Biliary cirrhosis** (HP:0002110) is associated with cystic fibrosis. This phenotype is defined as biliary cirrhosis.


KeyboardInterrupt: 

In [12]:
client = OpenAI(
        api_key="EMPTY",                      
        base_url=url_llm
    )
resp = client.chat.completions.create(
    model="google/medgemma-4b-it",
    messages=[
        {"role": "user", "content": "For cystic fibrosis, what symptoms are documented and what is the evidence supporting them??"}
    ],
    temperature=0,
)
response = resp.choices[0].message.content
print(response)

Cystic fibrosis (CF) is a genetic disorder that primarily affects the lungs, pancreas, liver, intestines, sinuses, and reproductive organs. The symptoms of CF vary in severity and presentation depending on the specific mutations a person has and how well they are managed.

Here's a breakdown of documented symptoms and the evidence supporting them:

**1. Respiratory Symptoms:**

*   **Chronic Cough:**
    *   **Symptoms:** Persistent cough, often productive (producing mucus).
    *   **Evidence:**  This is a hallmark symptom.  The thick, sticky mucus in the airways obstructs airflow, leading to inflammation and irritation, triggering the cough reflex.  Studies show that individuals with CF have significantly higher rates of chronic cough compared to the general population.  The cough is often worse at night or in the morning.
*   **Frequent Lung Infections:**
    *   **Symptoms:** Recurrent pneumonia, bronchitis, and other respiratory infections.
    *   **Evidence:**  The thick mucus p

### Patient Tool (Virtualized Resources)

In [19]:
patient_payload = {
    "patient_id": "P003",
    #"question": "Provide a concise clinical summary of patient P003 using all documented findings.",
    "question": "What is the follow up of patient P003 in date 2024-01-22",
}

patient_tool_output = build_patient_info_tool(chat_client).invoke(patient_payload)
patient_tool_output

{'patient_id': 'P003',
 'question': 'What is the follow up of patient P003 in date 2024-01-22',
 'encounter_date': '2024-01-22',
 'has_data': True,
 'answer': "Answer:\nThe follow-up for patient P003 on 2024-01-22 is that the patient was referred to infectious disease specialists for further serologic and imaging workup to clarify the cause of the recurrent low-grade fevers and systemic malaise. The patient was instructed to keep a detailed symptom and temperature diary. The patient's condition is recurrent low-grade fever, with a chief complaint of intermittent low-grade fevers with chills. The course trend is worsened. The patient has comorbidities of multiple mononeuropathy, posterior uveitis, and recurrent monoarthritis. The plan is to perform infectious disease consultation and further serologic workup. The observation vitals are 125/77 mmHg, HR 78 bpm, RR 16, Temp 37.9°C, SpO₂ 98%. The narrative indicates that the patient described several weeks of intermittent evening temperatur

### Coverage Tool (using the ICD-HPO mappings)

In [14]:
coverage_payload = {
    "patient_id": "P003",
    "limit": 20
}

coverage_tool_output = build_patient_coverage_tool(chat_client).invoke(coverage_payload)
coverage_tool_output



{'cypher': 'MULTI-STEP PIPELINE: get_patient_icd_codes -> map_icd_to_hpo -> rollup_hpo_to_ancestors -> compute_coverage',
 'rows': [{'diseaseId': 'ORPHA:117',
   'diseaseName': 'Behçet disease',
   'covered': 16,
   'total': 85,
   'coveragePct': 18.8,
   'missingHpoIds': ['HP:0002204',
    'HP:0002202',
    'HP:0007813',
    'HP:0200039',
    'HP:0001637',
    'HP:0100820',
    'HP:0002376',
    'HP:0001482',
    'HP:0000031',
    'HP:0000155',
    'HP:0002014',
    'HP:0001289',
    'HP:0000518',
    'HP:0100584',
    'HP:0100614',
    'HP:0012378',
    'HP:0100653',
    'HP:0200034',
    'HP:0100654',
    'HP:0000083',
    'HP:0002383',
    'HP:0001250',
    'HP:0012219',
    'HP:0002024',
    'HP:0001097',
    'HP:0012819',
    'HP:0002027',
    'HP:0001653',
    'HP:0000488',
    'HP:0002105',
    'HP:0002102',
    'HP:0001658',
    'HP:0001733',
    'HP:0001659',
    'HP:0100326',
    'HP:0010885',
    'HP:4000041',
    'HP:0001061',
    'HP:0000099',
    'HP:0002076',
    'HP:00

# Embedding API

In [None]:
cfg = load_config_api("embedding", path="../config.ini")
api = ApiClient(cfg)

In [None]:
body, status, headers = api.post('/embed', {'input': ['Who are you?', 'What is your name?']})
print("Status:", status)
print("Headers:", headers)
print("Body:", body)

# Semantic Search

In [None]:
import configparser
from database.neo4j_db import Neo4jGraphDB

config = configparser.ConfigParser()
config.read('../config.ini')
neo4j_graph = Neo4jGraphDB(database=config["neo4j"]["database"])
driver = neo4j_graph._driver
with driver.session(database=neo4j_graph._database) as session:
    result = session.run("MATCH (n) RETURN count(n) AS node_count")
    node_count = result.single()["node_count"]
print(f"Total number of nodes in the Neo4j database: {node_count}")


In [None]:
user_query = "muscle weakness"
q_embed = api.post('/embed', {'input': [user_query]})[0]['data'][0]
k = 10

with driver.session(database=neo4j_graph._database) as sess:
    result = sess.run("""
        CALL db.index.vector.queryNodes('hpo_phenotype_embedding', $k, $qe)
        YIELD node, score
        RETURN node.id as id, node.label as label, score
        ORDER BY score DESC
        LIMIT $k
    """, k=k, qe=q_embed)
    for rec in result:
        print(f"{rec['score']:.3f}  {rec['label']}  (id={rec['id']})")

# Customized API

In [None]:
cfg = load_config_api("chat", path="../config.ini")
api = ApiClient(cfg)

In [None]:
body, status, headers = api.post('/ask', {'question': 'Who are you?'})
print("Status:", status)
print("Headers:", headers)
print("Body:", body)