In [2]:
import os, sys

project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
sys.path.append(project_root)

from openai import OpenAI

from util.config_loader import load_config_api
from util.api_client import ApiClient
from llm.tool import build_ontology_mapper_tool

# Chat API

In [2]:
cfg = load_config_api("chat", path="../config.ini")
api = ApiClient(cfg)

Base URL set to: [DEFAULT_CHAT_API_URI]


In [None]:
body, status, headers = api.post('/ask', {'question': 'Who are you?'})
print("Status:", status)
print("Headers:", headers)
print("Body:", body)

# OpenAI API


In [3]:
url_llm = load_config_api("llm", path="../config.ini")
print("LLM URL:", url_llm)

LLM URL: https://chatcompletion-uncognizable-nilda.ngrok-free.dev/v1


## Single Request

In [4]:
client = OpenAI(
        api_key="EMPTY",                      
        base_url=url_llm
    )
resp = client.chat.completions.create(
    model="google/medgemma-4b-it",
    messages=[
        {"role": "user", "content": "Who are you?"}
    ],
    temperature=0,
)
response = resp.choices[0].message.content
response

'I am Gemma, an open-weights AI assistant. I am a large language model trained by Google DeepMind.\n'

## Batch Request


In [12]:
import asyncio
from openai import AsyncOpenAI

In [None]:
prompts = [
    "Explain the difference between type 1 and type 2 diabetes in simple terms.",
    "List the main symptoms of pneumonia and when a patient should see a doctor.",
    "What lifestyle changes can help reduce high blood pressure?"
]

async def run_batch(prompts, concurrency=8, max_tokens=128):
    client = AsyncOpenAI(api_key="EMPTY", base_url=url_llm)
    sem = asyncio.Semaphore(concurrency)

    async def one(p):
        async with sem:
            r = await client.chat.completions.create(
                model="google/medgemma-4b-it",
                messages=[{"role": "user", "content": p}],
                temperature=0.0,
                max_tokens=max_tokens,
                stream=False,
            )
            return r.choices[0].message.content

    tasks = [asyncio.create_task(one(p)) for p in prompts]
    out = await asyncio.gather(*tasks)
    await client.close()
    return out

answers = await run_batch(prompts, concurrency=8)
for q, a in zip(prompts, answers):
    print(q, "→", a)


RuntimeError: asyncio.run() cannot be called from a running event loop

# LangChain Usage

In [None]:
payload = {
    "source_concept": "Acute myocardial infarction",
    "source_context": "Patient with chest pain, ST-elevation, troponin markedly elevated.",
    "candidate_list": '[{"id":"SCTID:22298006","label":"Myocardial infarction"}, {"id":"SCTID:57054005","label":"Acute coronary syndrome"}]',
}
tool_output = ontology_mapper_tool.invoke(payload)
print(tool_output)

# Embedding API

In [6]:
cfg = load_config_api("embedding", path="../config.ini")
api = ApiClient(cfg)

Base URL set to: https://civilly-uncognizable-nilda.ngrok-free.dev


In [7]:
body, status, headers = api.post('/embed', {'input': ['Who are you?', 'What is your name?']})
print("Status:", status)
print("Headers:", headers)
print("Body:", body)

Status: 200
Headers: {'Content-Length': '153945', 'Content-Type': 'application/json', 'Date': 'Sat, 08 Nov 2025 08:33:38 GMT', 'Server': 'uvicorn', 'Connection': 'close'}
Body: {'model': 'Alibaba-NLP/gte-Qwen2-7B-instruct', 'num_inputs': 2, 'embedding_dims': 3584, 'data': [[0.006530743092298508, 0.0008580876747146249, -0.00030612858245149255, 0.017885329201817513, 0.002949966350570321, -0.009721901267766953, 0.013358337804675102, 0.0017903883708640933, -0.022115470841526985, 0.02434185892343521, 0.007495511788874865, -0.015287875197827816, 0.00515780271962285, -0.006493636406958103, 0.02345130406320095, 0.01001875288784504, 0.021224914118647575, -0.019592229276895523, -0.006976020988076925, -0.00500937644392252, 0.02003750577569008, 0.0021150701213628054, -0.007495511788874865, -0.0007838746532797813, 0.006011251825839281, -0.01617843098938465, -0.015584727749228477, -0.03250528872013092, 0.0043043531477451324, 0.023006025701761246, 0.017365839332342148, 0.004155927337706089, 0.0017068

# Semantic Search

In [8]:
import configparser
from database.neo4j_db import Neo4jGraphDB

config = configparser.ConfigParser()
config.read('../config.ini')
neo4j_graph = Neo4jGraphDB(database=config["neo4j"]["database"])
driver = neo4j_graph._driver
with driver.session(database=neo4j_graph._database) as session:
    result = session.run("MATCH (n) RETURN count(n) AS node_count")
    node_count = result.single()["node_count"]
print(f"Total number of nodes in the Neo4j database: {node_count}")


Total number of nodes in the Neo4j database: 93325


In [10]:
user_query = "zika disease"
q_embed = api.post('/embed', {'input': [user_query]})[0]['data'][0]
k = 10

with driver.session(database=neo4j_graph._database) as sess:
    result = sess.run("""
        CALL db.index.vector.queryNodes('hpo_phenotype_embedding', $k, $qe)
        YIELD node, score
        RETURN node.id as id, node.label as label, score
        ORDER BY score DESC
        LIMIT $k
    """, k=k, qe=q_embed)
    for rec in result:
        print(f"{rec['score']:.3f}  {rec['label']}  (id={rec['id']})")

0.807  Anti-Zikavirus-specific antibody positivity  (id=HP:0430066)
0.770  Anti-arbovirus antibody positivity  (id=HP:0430083)
0.757  Viremia  (id=HP:0020071)
0.749  Bloodstream Trypanosoma cruzi  (id=HP:0034581)
0.746  Anti-chikungunya virus antibody positivity  (id=HP:0430056)
0.744  Post-vaccination yellow fever  (id=HP:0034310)
0.743  Anti-Japanese encephalitis virus-specific antibody positivity  (id=HP:0430062)
0.742  Anti-West Nile virus antibody positivity  (id=HP:0430053)
0.741  Bloodstream trypomastigotes  (id=HP:0034524)
0.739  Bloodstream Malaria parasite  (id=HP:6000553)
