In [1]:
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from datasets import load_dataset
from qdrant_client import QdrantClient
from qdrant_client.http import models

load_dotenv()

True

In [2]:
data = load_dataset("keivalya/MedQuad-MedicalQnADataset", split='train')
data = data.to_pandas()
data.head()

Unnamed: 0,qtype,Question,Answer
0,susceptibility,Who is at risk for Lymphocytic Choriomeningiti...,LCMV infections can occur after exposure to fr...
1,symptoms,What are the symptoms of Lymphocytic Choriomen...,LCMV is most commonly recognized as causing ne...
2,susceptibility,Who is at risk for Lymphocytic Choriomeningiti...,Individuals of all ages who come into contact ...
3,exams and tests,How to diagnose Lymphocytic Choriomeningitis (...,"During the first phase of the disease, the mos..."
4,treatment,What are the treatments for Lymphocytic Chorio...,"Aseptic meningitis, encephalitis, or meningoen..."


In [3]:
MAX_ROWS = 15000
DOCUMENT="Answer"
subset_data = data.head(MAX_ROWS)

In [4]:
client = QdrantClient(
    os.getenv("QDRANT_HOST"),
    api_key=os.getenv("QDRANT_API_KEY")
)

#chunks = subset_data[DOCUMENT].to_list()
#
#client.add(
#    collection_name=os.getenv('QDRANT_COLLECTION_NAME'),
#    documents=chunks
#)

## Semantic Cache System

In [5]:
import uuid
import time
from typing import List
from fastembed import TextEmbedding
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, SearchParams

class SemanticCache:
    def __init__(self, threshold=0.35):
        self.encoder = TextEmbedding(model_name="BAAI/bge-small-en-v1.5")
        self.cache_client = QdrantClient(":memory:")
        self.cache_collection_name = "cache"

        self.cache_client.create_collection(
            collection_name=self.cache_collection_name,
            vectors_config=models.VectorParams(
                size=384,
                distance='Euclid'
            )
        )

        # Initialize Qdrant Client for external database
        self.db_client = QdrantClient(
            os.getenv("QDRANT_HOST"),
            api_key=os.getenv("QDRANT_API_KEY")
        )
        self.db_collection_name = os.getenv('QDRANT_COLLECTION_NAME')
        
        self.euclidean_threshold = threshold

    def get_embedding(self, question):
        embedding = list(self.encoder.embed(question))[0]
        return embedding

    def search_cache(self, embedding):
        search_result = self.cache_client.search(
            collection_name=self.cache_collection_name,
            query_vector=embedding,
            limit=1
        )
        return search_result

    def add_to_cache(self, question, response_text):
        # Create a unique ID for the new point
        point_id = str(uuid.uuid4())
        vector = self.get_embedding(question)
        # Create the point with payload
        point = PointStruct(id=point_id, vector=vector, payload={"response_text": response_text})
        # Upload the point to the cache
        self.cache_client.upload_points(
            collection_name=self.cache_collection_name,
            points=[point]
        )
        
    def query_database(self, query_text):
        results = self.db_client.query(
            query_text=query_text,
            limit=3,
            collection_name=self.db_collection_name
        )
        return results

    def ask(self, question):
        start_time = time.time()
        vector = self.get_embedding(question)
        search_result = self.search_cache(vector)
        
        if search_result:
            for s in search_result:
                if s.score <= self.euclidean_threshold:
                    print('Answer recovered from Cache.')
                    print(f'Found cache with score {s.score:.3f}')
                    elapsed_time = time.time() - start_time
                    print(f"Time taken: {elapsed_time:.3f} seconds")
                    return s.payload['response_text']

        db_results = self.query_database(question)
        if db_results:
            response_text = db_results[0].document
            self.add_to_cache(question, response_text)
            print('Answer added to Cache.')
            elapsed_time = time.time() - start_time
            print(f"Time taken: {elapsed_time:.3f} seconds")
            return response_text

        # Fallback if no response is found
        print('No answer found in Cache or Database.')
        elapsed_time = time.time() - start_time
        print(f"Time taken: {elapsed_time:.3f} seconds")
        return "No answer available."

In [6]:
cache = SemanticCache()
question_1 = "Explain briefly what is a Sydenham chorea"
question_2 = "Briefly explain me what is a Sydenham chorea."

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

In [7]:
cache.ask(question_1)

Answer added to Cache.
Time taken: 0.620 seconds


'Sydenham chorea (SD) is a neurological disorder of childhood resulting from infection via Group A beta-hemolytic streptococcus (GABHS), the bacterium that causes rheumatic fever. SD is characterized by rapid, irregular, and aimless involuntary movements of the arms and legs, trunk, and facial muscles. It affects girls more often than boys and typically occurs between 5 and 15 years of age. Some children will have a sore throat several weeks before the symptoms begin, but the disorder can also strike up to 6 months after the fever or infection has cleared. Symptoms can appear gradually or all at once, and also may include uncoordinated movements, muscular weakness, stumbling and falling, slurred speech, difficulty concentrating and writing, and emotional instability. The symptoms of SD can vary from a halting gait and slight grimacing to involuntary movements that are frequent and severe enough to be incapacitating. The random, writhing movements of chorea are caused by an auto-immune 

In [8]:
cache.ask(question_2)

Answer recovered from Cache.
Found cache with score 0.151
Time taken: 0.013 seconds


'Sydenham chorea (SD) is a neurological disorder of childhood resulting from infection via Group A beta-hemolytic streptococcus (GABHS), the bacterium that causes rheumatic fever. SD is characterized by rapid, irregular, and aimless involuntary movements of the arms and legs, trunk, and facial muscles. It affects girls more often than boys and typically occurs between 5 and 15 years of age. Some children will have a sore throat several weeks before the symptoms begin, but the disorder can also strike up to 6 months after the fever or infection has cleared. Symptoms can appear gradually or all at once, and also may include uncoordinated movements, muscular weakness, stumbling and falling, slurred speech, difficulty concentrating and writing, and emotional instability. The symptoms of SD can vary from a halting gait and slight grimacing to involuntary movements that are frequent and severe enough to be incapacitating. The random, writhing movements of chorea are caused by an auto-immune 