In [44]:
%pip install pydantic_settings langchain langchain-core langchain-google-genai langchain-qdrant fastembed langchain-community qdrant-client langgraph

I0000 00:00:1753944379.290108  812510 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


Note: you may need to restart the kernel to use updated packages.


In [45]:
from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    model_config = SettingsConfigDict(env_file=".env")

env = Settings()

In [46]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_2 = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001", google_api_key=env.GOOGLE_API_KEY)

In [47]:
from qdrant_client.http.models import Distance

collection_name = "list_doctors"
dimension = 3072
distance = Distance.COSINE

In [48]:
import json

with open("doctors_final.json", "r") as f:
    list_doctkers = json.load(f)

print(list_doctkers[0])

{'id': '5eae0017-40dd-4961-869f-79d9e45d87f2', 'name': 'Adventia Emilia Krysna Sipi Seda, M.M., M.Psi., Psikolog', 'specialization_name': 'Psikologi', 'specialization_name_en': 'Psychology', 'sub_specialization_name': 'Psikolog', 'sub_specialization_name_en': 'Psychologist', 'hospital_name': 'Siloam Hospitals Yogyakarta'}


In [49]:
from qdrant_client import QdrantClient

client = QdrantClient(":memory:")

In [50]:
from qdrant_client.http.models import VectorParams

if(client.collection_exists(collection_name=collection_name) == False):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=dimension, distance=distance),
    )

In [None]:
import time
import uuid
from qdrant_client.models import PointStruct
from google.api_core.exceptions import ResourceExhausted

MAX_RETRY = 5
RETRY_DELAY_SECONDS = 2 
collection_name = "list_doctors"

def embed_with_retry(text, max_retry=MAX_RETRY):
    for attempt in range(1, max_retry + 1):
        try:
            return embeddings_2.embed_query(text)
        except ResourceExhausted as e:
            print(f"[Attempt {attempt}] Rate limit hit. Retrying in {RETRY_DELAY_SECONDS * attempt} seconds...")
            time.sleep(RETRY_DELAY_SECONDS * attempt)
        except Exception as e:
            print(f"[Attempt {attempt}] Failed to embed: {e}")
            raise e
    raise RuntimeError("Max retries reached. Embedding failed.")

for i, row in enumerate(list_doctkers, start=1):
    try:
        text = f"Doctor: {row['name']}, specialization_name: {row['specialization_name_en']}, location: {row['hospital_name']}"
        emb = embed_with_retry(text)
        
        point = PointStruct(
            id=str(uuid.uuid4()),
            vector=emb,
            payload={
                "page_content": text,
                "metadata": {
                    "id": row['id'],
                    "name": row['name'],
                    "specialization_name": row['specialization_name_en'],
                    "sub_specialization_name": row.get("sub_specialization_name_en", ""),
                    "hospital_name": row["hospital_name"]
                },
            },
        )
        client.upsert(
            collection_name=collection_name,
            points=[point]
        )
        print(f"[{i}] Successfully upserted: {text}")

    except Exception as e:
        print(f"[{i}] Failed processing doctor '{row.get('name', '-')}' due to error: {e}")
        continue  


[1] Successfully upserted: Doctor: Adventia Emilia Krysna Sipi Seda, M.M., M.Psi., Psikolog, specialization_name: Psychology, location: Siloam Hospitals Yogyakarta
[2] Successfully upserted: Doctor: Dokter Umum Siloam Yogyakarta, specialization_name: General Practitioner, location: Siloam Hospitals Yogyakarta
[3] Successfully upserted: Doctor: dr. Andrean Jefrian Manihuruk, specialization_name: General Practitioner, location: Siloam Hospitals Yogyakarta
[4] Successfully upserted: Doctor: dr. Anggoro Budi Hartopo, M.Sc., Ph.D., SpPD., SpJP (K), specialization_name: Cardiology (Heart), location: Siloam Hospitals Yogyakarta
[5] Successfully upserted: Doctor: dr. Antonius Arif Kusuma, SpB, specialization_name: General Surgery, location: Siloam Hospitals Yogyakarta
[6] Successfully upserted: Doctor: dr. Antonius Sri Widayanto, SpOG, specialization_name: Obstetrics and Gynecology, location: Siloam Hospitals Yogyakarta
[7] Successfully upserted: Doctor: dr. Arvin Manuel Wulur, SpKFR, AIFO-K, 

In [None]:
from langchain_qdrant import QdrantVectorStore
def get_retriever():

    vector_store = QdrantVectorStore(
        client=client,
        collection_name=collection_name,
        embedding=embeddings_2,
    )
    
    return vector_store.as_retriever()

In [None]:
from langchain_core.tools import tool
from typing import Annotated, List

@tool
def search_match_doctor(query: Annotated[str, "Search query must contain keywords related to doctor name, specialization, or hospital location"]) -> List[str]:
    """Search for doctors by name, specialization, sub-specialization, or hospital affiliation."""
    retriever = get_retriever()
    results = retriever.invoke(query)
    return [result.page_content for result in results]


In [None]:
search_match_doctor("saya sakit kepala dan saya berasal dari jogja tolong berikan saya rekomendasi dokternya")

GoogleGenerativeAIError: Error embedding content: 429 Resource has been exhausted (e.g. check quota).

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=env.GOOGLE_API_KEY,
)

prompt = ChatPromptTemplate.from_messages([
    ("system", """
You are a domain expert assistant for Siloam Hospitals.

Your job is to extract **one concise keyword** from the user's question to help match them with the most suitable **doctor** based on either:
- medical specialization (e.g., cardiology, dermatology, pediatrician), or
- user location (e.g., Jakarta, Surabaya, BSD, etc.).

Instructions:
- Only return **one keyword in English** — either a specialization or location.
- If the user's question includes a suitable keyword, return it as is.
- Do **not** add any explanations, greetings, or extra text — just the keyword.
- If no relevant keyword is found, return an empty string ("").

This keyword will be used to search in the Siloam doctor directory.
"""),
    ("human", "{question}"),
])

chain = prompt | llm


NameError: name 'env' is not defined

In [None]:
chain.invoke({"question": "Saya mau cek gula darah di siloam bali, dokter yang cocok siapa ya ?"})

In [None]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
class State(TypedDict):
    question: str
    context: List[str]
    search: str
    answer: str

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

def get_context(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You are a domain expert in healthcare services at Siloam Hospitals.
                Your task is to extract a **single relevant keyword** from the user's question to help match them with the most appropriate **doctor** based on specialization and/or location.

                Guidelines:
                - Only return **one keyword in English**, based on the user’s question.
                - If the question already contains a suitable keyword (e.g., a specialization or city), return it as is.
                - Prioritize medical specialties (e.g., cardiology, pediatrician) or geographic indicators (e.g., Jakarta, Surabaya).
                - Do **not** include any explanatory text or additional information.
                - Ignore general health questions that don’t indicate a clear intent to find a doctor.

                This keyword will be used to query the vector database for doctor matching.
                """),
                ("human", "{question}")
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"]})
    return {"search": result.content}

In [None]:
def retrieve(state: State):
    retrieved_docs = search_match_doctor(state["search"])
    return {"context": retrieved_docs}

In [None]:
def generate(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You are an assistant that provides information about Medical Check-Up (MCU) packages at Siloam hospitals.
                You will generate a response based on the context provided.
                The response should be concise and relevant to the question asked.
                package list knowledge: 
                {context}
                If the context is empty, you can provide a general response about MCU packages.
                Please always include related packages in your response.
            """),
            ("human", "{question}"),
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"], "context": state["context"]})
    return {"answer": result.content}

In [None]:
graph_builder = StateGraph(State).add_sequence([get_context, retrieve, generate])
graph_builder.add_edge(START, "get_context")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({
	"question": "Saya mau cek gula darah di siloam, ada paket apa aja ya ?",
	"context": [],
	"search": "",
	"answer": ""
})
print(response["answer"])