In [22]:
import warnings
warnings.filterwarnings("ignore")

In [23]:
import pandas as pd
import numpy as np

from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

### Sentence Transformer

Sentence Transformers (a.k.a. SBERT) is the go-to Python module for accessing, using, and training state-of-the-art text and image embedding models. Characteristics of Sentence Transformer (a.k.a bi-encoder) models:

1) Calculates a fixed-size vector representation (embedding) given texts or images.
2) Embedding calculation is often efficient, embedding similarity calculation is very fast.
3) Applicable for a wide range of tasks, such as semantic textual similarity, semantic search, clustering, classification, paraphrase mining, and more.
4) Often used as a first step in a two-step retrieval process, where a Cross-Encoder (a.k.a. reranker) model is used to re-rank the top-k results from the bi-encoder.

https://sbert.net/index.html



In [24]:
# creating embedding model from sentence transformer for getting the embeddings of the text

txt_embedder = SentenceTransformer("all-MiniLM-L6-v2")

In [25]:
print(f"Embedding model size: {txt_embedder.get_sentence_embedding_dimension()}")

Embedding model size: 384


### Qdrant setup

Qdrant (read: quadrant) is a vector similarity search engine and vector database. It provides a production-ready service with a convenient API to store, search, and manage points—vectors with an additional payload Qdrant is tailored to extended filtering support. It makes it useful for all sorts of neural-network or semantic-based matching, faceted search, and other applications.

In [26]:
# creating the vector database client using qdrant

qdrant = QdrantClient(":memory:")

In [27]:
# creating Qdrant Collection to store the data

qdrant.recreate_collection(
    collection_name="clinical_notes",
    vectors_config=models.VectorParams(
        size=txt_embedder.get_sentence_embedding_dimension(),
        distance=models.Distance.COSINE
    )
)

True

### Data Loading

In [28]:
notes_data = pd.read_csv("../data/train.csv")

In [29]:
notes_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10822 entries, 0 to 10821
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      10822 non-null  int64 
 1   Note    10822 non-null  object
 2   json    10822 non-null  object
dtypes: int64(1), object(2)
memory usage: 253.8+ KB


In [30]:
notes_data.isnull().sum()

ID      0
Note    0
json    0
dtype: int64

In [31]:
notes_data.duplicated().sum()

np.int64(0)

In [32]:
rag_notes = notes_data.sample(1000).to_dict("records")

In [33]:
print(f"Length of the data : {len(rag_notes)}")

Length of the data : 1000


### Vectorize

In [34]:
qdrant.upload_points(
    collection_name="clinical_notes",
    points=[
        models.PointStruct(
            id=d["ID"],
            vector=txt_embedder.encode(d["Note"]).tolist(),
            payload=d
        ) for d in rag_notes
    ]
)

In [35]:
search_prompt = "I am suffering from fever, suggest what I can do as a remedy in the next two days."

In [36]:
# searching for some clinical suggestions

hits = qdrant.search(
    collection_name="clinical_notes",
    query_vector=txt_embedder.encode(search_prompt).tolist(),
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key='ID',
                range=models.Range(lte=1000))
                ]),
    limit=3
)

In [37]:
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'ID': 944, 'Note': 'Clinical Note:\n\nPatient: 15-year-old female\nChief Complaint: Influenza-like symptoms\n\nHistory of Present Illness:\nThe patient, a 15-year-old female, presents to the clinic with complaints of flu-like symptoms. She reports experiencing fever, fatigue, headache, and joint pain. Additionally, the patient mentions frequent urination and dry skin, which are not typical flu symptoms.\n\nReview of Systems:\n- General: Positive for fever and fatigue\n- Neurological: Positive for headache\n- Musculoskeletal: Positive for joint pain\n- Urinary: Frequent urination\n- Skin: Dry skin\n\nPhysical Examination:\n- Vital Signs:\n  Temperature: 39.4°C (elevated)\n  Heart Rate: 122 bpm (tachycardic)\n  Respiratory Rate: 14 breaths/min (within normal range)\n- General: Patient appears fatigued and uncomfortable\n- Skin: Dry, warm to touch\n- HEENT: No notable abnormalities\n- Cardiovascular: Tachycardic, regular rhythm, no murmurs\n- Respiratory: Clear lung sounds bilaterally\n-

### Integrating RAG with LLAMA

In [38]:
# defining the search results
search_results = [hit.payload for hit in hits]

In [46]:
assistant_content = (
    "Based on the search results, here is some information:\n" +
    "\n".join([str(item) for item in search_results])
)

In [47]:
import ollama

In [48]:
chat_completion = ollama.chat(
    model="llama3.2:latest",
    messages=[
        {
            "role": "system",
            "content": (
                "You are a clinical notes specialist. Your job is to provide actionable suggestions to users' medical queries "
                "based on the provided notes. Be concise and specific, and avoid returning any code or unrelated content."
            )
        },
        {
            "role": "user",
            "content": (
                "I am suffering from fever, fatigue, headache, and joint pain. Based on the provided clinical notes, "
                "what should I do as a remedy in the next two days?"
            )
        },
        {
            "role": "assistant",
            "content": assistant_content
        }
    ]
)

In [49]:
response_content = chat_completion.get("message", {}).get("content", "")
if response_content.strip():
    print("LLM Response Content:")
    print(response_content)
else:
    print("The response was empty or irrelevant. Retrying with a more specific context...")

LLM Response Content:
}
{'ID': 123, 'Note': "**Clinical Notes:**\n\n**Patient Information:**\n- **Age:** 25\n- **Gender:** Female\n\n**Visit Motivation:**\n- Patient presents with complaints suggestive of bronchitis.\n\n**Symptoms:**\n- The patient reports experiencing fever, cough, shortness of breath, chest pain, fatigue, headache, nausea, runny nose, sore throat, joint pain, dizziness, and itchy eyes.\n\n**Vital Signs:**\n- **Temperature:** 37.1°C (within normal range)\n- **Heart Rate:** 74 bpm (normal range is 60-100 bpm)\n- **Respiratory Rate:** 22 breaths/min (normal range is 12-20 breaths/min)\n- **Glucose Level:** 92 mg/dL (slightly elevated; normal range is 70-110 mg/dL)\n\n**Assessment:**\nThe patient’s symptoms, including fever, cough, and shortness of breath, are indicative of acute bronchitis. The presence of joint pain and itchy eyes may suggest a systemic involvement or possible viral etiology. The slightly elevated glucose level warrants further investigation into possi