In [2]:
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_qdrant import Qdrant
import qdrant_client
from qdrant_client.models import PointStruct, VectorParams, Distance
import pandas as pd
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# res = requests.get('https://uat-mysiloam-api.siloamhospitals.com/api/v2/doctors/withavailability')
# doctors = res.json()['data']
# print(doctors[0])

In [4]:
import json

with open("doctors.json", "r", encoding="utf-8") as file:
    doctors = json.load(file)

# Now `doctors` is a list of dictionaries again
doctors[0]


{'doctor_id': '49d31a8d-bce4-4600-921e-8cf516819482',
 'name': 'Anasthasya Fielia Litelnoni, M.PSi., Psikolog',
 'doctor_hope_id': 1000001538,
 'hospital_names': 'Siloam Hospitals Manado',
 'specialization_id': '27727de9-0fa2-4b93-bd4d-5c0bd6030c0c',
 'speciality_id': '51637b01-fa8c-4f5c-868f-38b484570431',
 'specialization_name': 'Kesehatan Mental',
 'specialization_name_en': 'Mental Health',
 'sub_specialization_name': 'Psikolog',
 'sub_specialization_name_en': 'Psychologist',
 'image_url': None,
 'is_emergency_enable': False,
 'consultation_price': None,
 'teleconsult_price': 0,
 'hospital_id': '63c6af56-bb9a-4962-a698-454d3345630d',
 'is_secured_booking': False,
 'is_have_schedule': True,
 'consultation_type': '1, 6',
 'doctor_seo_key': 'anasthasya-fielia-litelnoni-m-psi-psikolog',
 'next_avail': '2025-03-14'}

In [5]:
embeddings = FastEmbedEmbeddings(cache_dir="./embedding_cache", model_name="mixedbread-ai/mxbai-embed-large-v1")
url = "http://localhost:6333"
collection_name = "test0908_001_doctor"
distance = Distance.DOT
dimension = 1024

In [4]:
def moveEmbbeding():
    # create function to load data from json 
    client = qdrant_client.QdrantClient(
        url=url,
    )
    if(client.collection_exists(collection_name=collection_name) == False):
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=dimension, distance=distance),
        )
    i = 0

    for row in doctors:
        i += 1
        text = f"Doctor with name {row['name']}, is a distinguished {row['sub_specialization_name_en']} at {row['hospital_names']}, specializing in {row['specialization_name_en']}"
        emb = embeddings.embed_query(text)
        print(i)
        id = row['doctor_id']
        client.upsert(
            collection_name=collection_name,
            points=[
                PointStruct(
                    id=str(id), 
                    vector=emb, 
                    payload={
                        "page_content": text,
                        "metadata": {
                                "doctor_id": row['doctor_id'],
                                "name": row['name'],
                        },
                    },
                )
            ],
        )
        print(text)

In [6]:
moveEmbbeding()

1
Doctor with name Anasthasya Fielia Litelnoni, M.PSi., Psikolog, is a distinguished Psychologist at Siloam Hospitals Manado, specializing in Mental Health
2
Doctor with name Citra Hati Leometa, Psi, is a distinguished Psychologist at Siloam Hospitals Lippo Cikarang, specializing in Mental Health
3
Doctor with name Dokter Umum Siloam Yogyakarta, is a distinguished General Practitioner at Siloam Hospitals Yogyakarta, specializing in General Practitioner
4
Doctor with name dr. A. A. Ayu Nancy Karang, SpTHT-KL, is a distinguished Otorhinolaryngologist at Siloam Hospitals Denpasar, specializing in Otorhinolaryngology (ENT)
5
Doctor with name dr. A.A Bagus Indra Permadi, SpOG, is a distinguished Obstetrician and Gynecologist at Siloam Hospitals Denpasar, specializing in Obstetrics and Gynecology
6
Doctor with name dr. Abdul Halim Raynaldo, SpJP (K), is a distinguished Preventive Cardiology and Cardiac Rehabilitation Subspecialist at Siloam Hospitals Medan, specializing in Cardiology (Heart)

In [7]:
client = qdrant_client.QdrantClient(
    url=url,
)
qdrant = Qdrant(
    client,
    embeddings=embeddings,
    collection_name=collection_name,
    distance_strategy=distance,
)

  qdrant = Qdrant(


In [8]:
doc = qdrant.similarity_search_with_score('eka', score_threshold=0.5, k=20)
for d in doc:
    print(d[0].metadata['name'])

dr. Prass Ekasetia Poetra
dr. Eka Rahmadini HS, SpM
Dr. dr. I Gusti Ayu Putu Eka Pratiwi, M.Kes, SpA (K)
Prof. Dr. Dr. dr. Eka J. Wahjoepramono, SpBS, K 2
dr. Eka Sri Handayani, M.Ked.Klin, SpA
dr. Tiara Eka Mayasari
dr. Eka Seprianti, SpAn-KIC
dr. Ekta Novalina Manik
Dr. dr. FC. Christofani Ekapatria, SpOG, Subsp F.E.R
dr. Maria Michaela Stephani Rea
dr. Putu Eka Ratna Setia
dr. Elya
dr. Andrew Eka Pramudita Sunardi, Sp.JP
dr. Noni Eka Setya Suaebo
drg. Devi Ekawati
dr. Krisma Kurnia, Sp.PD, FINASIM, AIFO-K
dr. Inda Astri Aryani, SpKK (K)
dr. Kadek Yuris Wira Artha, M.Biomed, SpOT
dr. Eka Dian Safitri, SpTHT-KL
dr. Ekawaty Yasinta Larope, SpA (K)


In [9]:
test_list = [{'query': x['name'], 'expected': x['name']} for x in doctors]
print(test_list[0])

{'query': 'Anasthasya Fielia Litelnoni, M.PSi., Psikolog', 'expected': 'Anasthasya Fielia Litelnoni, M.PSi., Psikolog'}


In [10]:
test_list = test_list + [
    {
        'query': 'ucok spesiali paru',
        'expected': 'dr. Ucok P Siregar, SpB, SpOT (K), FICS'
    },
    {
        'query': 'lydia',
        'expected': 'Prof. Dr. Lydia Freyani Hawadi, M.Si., MM., Psikolog'
    },
    {
        'query': 'eka',
        'expected': 'Prof. Dr. Dr. dr. Eka J. Wahjoepramono, SpBS, K'
    },
]

In [11]:
def testing(qdrant, q, expected):
    doc = qdrant.similarity_search_with_score(q, score_threshold=0.5, k=20)
    i = 0
    for d in doc:
        if(d[0].metadata["name"] == expected):
            return [ True, d]
        i += 1
    if len(doc) == 0:
        return [False, "No result"]
    return [False, doc[0]]

In [12]:
results = []
for t in test_list:
    [status, data] = testing(qdrant, t["query"], t["expected"])
    if(status == False):
        _data = {
            "query": t["query"],
            "result": data[0].metadata['name'] if data != "No result" else "No result",
            "score": data[1] if data != "No result" else "No result"
        }
        results.append(_data)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    df = pd.DataFrame(data=results)
    display(df)

Unnamed: 0,query,result,score
0,eka,dr. Prass Ekasetia Poetra,0.575061
