In [None]:
%pip install pydantic_settings langchain langchain-core langchain-google-genai langchain-qdrant fastembed langchain-community qdrant-client langgraph

In [None]:
from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    model_config = SettingsConfigDict(env_file=".env")

env = Settings()

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_2 = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001", google_api_key=env.GOOGLE_API_KEY)

In [None]:
# Optionally, you can use FastEmbed for embeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
embeddings_2 = FastEmbedEmbeddings(cache_dir="./embedding_cache", model_name="jinaai/jina-embeddings-v2-base-en")
# # https://qdrant.github.io/fastembed/examples/Supported_Models/#supported-text-embedding-models

In [None]:
from qdrant_client.http.models import Distance

collection_name = "doctor_packages"
# fastembed
dimension = 768 
## gemini embedding
# dimension = 3072
distance = Distance.COSINE

# Create Vector Data

In [None]:
# load mcu.json data
import json

with open("doctors_final.json", "r") as f:
    mcu_data = json.load(f)

print(mcu_data[0])

In [None]:
from qdrant_client import QdrantClient

client = QdrantClient(":memory:")

In [None]:
from qdrant_client.http.models import VectorParams

if(client.collection_exists(collection_name=collection_name) == False):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=dimension, distance=distance),
    )

In [None]:
from qdrant_client.models import PointStruct
import uuid
i = 0
for row in mcu_data:
    i += 1
          
    text = f"Doctor {row['name']} is a {row['sub_specialization_name_en']} in the field of {row['specialization_name_en']} at {row['hospital_name']}."
    emb = embeddings_2.embed_query(text)
    print(i)
    client.upsert(
        collection_name=collection_name,
        points=[
            PointStruct(
                id=str(uuid.uuid4()),  # Generate a unique ID for each point
                vector=emb, 
                payload={
                    "page_content": text,
                    "metadata": {
                            "id": row['id'],
                            "name": row['name'],
                            "specialization_name": row['specialization_name'],
                            "sub_specialization_name": row['sub_specialization_name'],
                            "hospital_name": row['hospital_name']
                    },
                },
            )
        ],
    )
    print(text)

# Create Tool

In [None]:
from langchain_qdrant import QdrantVectorStore
def get_retriever():

    vector_store = QdrantVectorStore(
        client=client,
        collection_name=collection_name,
        embedding=embeddings_2,
    )
    
    return vector_store.as_retriever()

In [None]:
from langchain_core.tools import tool
from typing import Annotated, List

@tool
def search_doctors_recommendation(query: Annotated[str, "search query must contain keywords related to doctor packages"]) -> List[str]:
    """Search for doctors by name, specialization, sub specialization, symptom, or hospital name."""
    retriever = get_retriever()
    results = retriever.invoke(query, k=10)
    return [result.page_content for result in results]

In [None]:
search_doctors_recommendation("psychologist for anxiety in yogyakarta")

# Create Agent

In [None]:
# access the Google Gemini API
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=env.GOOGLE_API_KEY,
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that provides information about doctor information from various hospitals and city but mainly from Siloam Hospital Group."),
        ("human", "{question}"),
    ]
)

chain = prompt | llm

In [None]:
chain.invoke({"question": "I'm currently feeling severe anxiety, is there any doctor for my condition?"})

# Workflow for agent to use tool

In [None]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
# Define state for application
class State(TypedDict):
    question: str
    context: List[str]
    search: str
    answer: str

In [None]:
def get_context(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You are an customer service and concierge for giving doctor recommendation.
                You will provide one or more doctors recommendation based on the question.
                The keywords should be relevant to the doctors data available at Siloam hospitals group.
                Try not to provide any other information.
                If the question already contains keywords, you can return them as is.
                Only return doctor name, specialization, and hospital name in english.

                Based on the user's question, extract the most relevant medical specialization or symptom. 
                The keywords should be in English. 
                For example, if the user asks 'my stomach hurts', a good keyword is 'Gastroenterology'. If they ask 'I need a doctor for my child', a good keyword is 'Pediatrics'. 
                Only return the keyword(s).
            """),
            ("human", "{question}"),
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"]})
    return {"search": result.content}

In [None]:
def retrieve(state: State):
    retrieved_docs = search_doctors_recommendation(state["search"])
    return {"context": retrieved_docs}

In [None]:
def generate(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You provide doctor recommendations based on the context provided. 
                Answer the user's question by suggesting one or more doctors from the list. 
                Clearly state their name, specialization, and hospital. 
                Format the response nicely. 
                doctor list knowledge: 
                {context}
                If no relevant doctors are found in the context, politely say that you couldn't find a specific match but can help with other queries

                
            """),
            ("human", "{question}"),
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"], "context": state["context"]})
    return {"answer": result.content}

In [None]:
graph_builder = StateGraph(State).add_sequence([get_context, retrieve, generate])
graph_builder.add_edge(START, "get_context")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({
	"question": "I'm feeling very stressed and anxious lately, any doctor for my condition?",
	"context": [],
	"search": "",
	"answer": ""
})
print(response["answer"])