In [10]:
import datasets
from tqdm.notebook import tqdm
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models
from langchain.docstore.document import Document as LangchainDocument

In [11]:
client = QdrantClient(path="medicalqna.db")

In [12]:
encoder = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
client.recreate_collection(
    collection_name="medicalqna",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

In [None]:
ds = datasets.load_dataset("ruslanmv/ai-medical-chatbot", split="train")

In [None]:
ds

In [None]:
RAW_KNOWLEDGE_BASE = [
    LangchainDocument(page_content=doc["Description"], metadata={"question": doc["Patient"], "answer": doc["Doctor"]})
    for doc in tqdm(ds)
]

In [None]:
import multiprocessing as mp

def process_document(idx, doc):
    content = f'''{doc.page_content}: {doc.metadata["question"]}: {doc.metadata["answer"]}'''
    vector = encoder.encode(content).tolist()
    payload = {"question": doc.metadata["question"], "answer": doc.metadata["answer"], "page_content": doc.page_content}
    return models.PointStruct(id=idx, vector=vector, payload=payload)

with mp.Pool(mp.cpu_count()) as pool:
        results = list(tqdm(pool.starmap(process_document, enumerate(RAW_KNOWLEDGE_BASE)), total=len(RAW_KNOWLEDGE_BASE)))
    
points = results

In [None]:
points[0]

In [None]:
client.upload_records(
    collection_name="medicalqna",
    records=points,
    parallel=8
)

In [13]:
hits = client.search(
    collection_name="medicalqna",
    query_vector=encoder.encode("how to perform cpr?").tolist(),
    limit=5
)

In [14]:
hits[0]

ScoredPoint(id=83117, version=0, score=0.5190875365031189, payload={'question': 'A. stroke west nile virus cardiac arrest asthma', 'answer': "Cardiac arrest requires immediate CPR if necessary ACLS or ALS(Advanced Life Support).  However in case of Bronchial Asthma if a patients develop Respiratory fatigue and his Oxygen concentration in blood starts falling put him on a ventilator, but wean him off as early as possible OR if he develops Cyanosis CPR + ALS.  There are other protocols for all the above disease but you stick to cardiac arrest and asthma.  Don't forget Cardiac Asthma, severe cases may requires ACLS.  So it is the indication and symtomology of the patient that gives rise to CPR, ACLS etc. no hard and fast rule.", 'page_content': 'What condition requires cardiopulmonary resuscitation ?'}, vector=None, shard_key=None, order_value=None)

In [None]:
context = ""
for hit in hits:
    context += hit.payload['answer']

In [None]:
context

In [None]:
for hit in hits:
    print(hit.payload['answer'])