In [None]:
!pip install sentence-transformers faiss-cpu transformers peft bitsandbytes

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (fr

In [None]:
import os
import pickle
import numpy as np
import torch
import faiss

from google.colab import drive
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
from IPython.display import Markdown

# Ensure CUDA errors show up
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


- **Load FAISS Index and Chunks**

In [None]:
# Paths
index_path = '/content/drive/MyDrive/DEPI_final_project/medical_chatbot_faiss.index'
chunks_path = '/content/drive/MyDrive/DEPI_final_project/chunks.pkl'

# Load
if not os.path.exists(index_path) or not os.path.exists(chunks_path):
    raise FileNotFoundError("FAISS index or chunks file not found.")

index = faiss.read_index(index_path)
with open(chunks_path, 'rb') as f:
    chunks = pickle.load(f)

# Move FAISS to GPU if available
try:
    res = faiss.StandardGpuResources()
    gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
    print("FAISS index moved to GPU")
except Exception as e:
    print(f"Failed to move index to GPU: {e}. Using CPU.")
    gpu_index = index

Failed to move index to GPU: module 'faiss' has no attribute 'StandardGpuResources'. Using CPU.


- **Load Embedding Model**

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
embed_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

- **Load My Medical Model**

In [None]:
# Model info
base_model_name = "Qwen/Qwen2.5-3B-Instruct"
adapter_model_id = "AbdullahAlnemr1/qwen2.5-medical-lora"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)

# Load LoRA adapter and merge
model = PeftModel.from_pretrained(base_model, adapter_model_id)
model = model.merge_and_unload()
model.eval()

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/59.9M [00:00<?, ?B/s]



Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 2048)
    (layers): ModuleList(
      (0-35): 36 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=True)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=True)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((2048,), 

In [None]:
# RAG: Retrieve top_k chunks for query
def retrieve_chunks(query, top_k=5):
    query_embedding = embed_model.encode([query], show_progress_bar=False).astype('float32')
    distances, indices = gpu_index.search(query_embedding, top_k)
    return [chunks[idx] for idx in indices[0] if idx < len(chunks)]

# RAG + Generate
def rag_generate(query, top_k=5, max_context_tokens=800):
    retrieved_chunks = retrieve_chunks(query, top_k)
    context = "\n\n".join(retrieved_chunks)

    # Truncate context if too long
    context_tokens = tokenizer.encode(context, add_special_tokens=False)
    if len(context_tokens) > max_context_tokens:
        context_tokens = context_tokens[:max_context_tokens]
        context = tokenizer.decode(context_tokens, skip_special_tokens=True)

    # Prompt template (chat-style)
#     prompt = f"""<|im_start|>system
# You are a helpful medical assistant.
# <|im_end|>
# <|im_start|>user
# Context:
# {context}

# Question: {query}
# <|im_end|>
# <|im_start|>assistant
# """

    prompt = f"""<|im_start|>system
You are a helpful and specialized medical assistant. You ONLY answer questions related to the medical field, such as diseases, symptoms, medications, anatomy, diagnosis, treatments, healthcare, and related topics.

If the user asks a question that is not related to the medical field, reply with: "I'm sorry, I can only answer questions related to the medical field."
<|im_end|>
<|im_start|>user
Context:
{context}

Question: {query}
<|im_end|>
<|im_start|>assistant
"""


    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1000).to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return decoded, retrieved_chunks

# Baseline: Generate without RAG
def generate_response(instruction):
#     prompt = f"""<|im_start|>system
# You are a highly knowledgeable and accurate medical assistant trained to provide evidence-based medical advice.
# <|im_end|>
# <|im_start|>user
# {instruction}
# <|im_end|>
# <|im_start|>assistant
# """

    prompt = f"""<|im_start|>system
You are a highly knowledgeable and specialized medical assistant. You ONLY answer questions related to the medical field, such as diseases, symptoms, medications, anatomy, diagnosis, treatments, healthcare, and related topics.

If the user asks a question that is not related to the medical field, reply with: "I'm sorry, I can only answer questions related to the medical field."
<|im_end|>
<|im_start|>user
{instruction}
<|im_end|>
<|im_start|>assistant
"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return decoded

In [None]:
from IPython.display import Markdown, display

# Test queries
queries = [
    "What causes high blood pressure?",
    "Why is high blood pressure so common with people with diabetes?"
]

for query in queries:
    print("="*80)
    display(Markdown(f"### 🔎 Query: *{query}*"))

    # Baseline
    display(Markdown("#### 🧠 Baseline (No RAG):"))
    no_rag_answer = generate_response(query)
    display(Markdown(no_rag_answer))

    # RAG
    display(Markdown("#### 📚 With RAG:"))
    rag_answer, retrieved_chunks = rag_generate(query)
    display(Markdown(rag_answer))

    # Retrieved chunks
    display(Markdown("#### 🔍 Retrieved Chunks (Summary):"))
    for i, chunk in enumerate(retrieved_chunks, 1):
        display(Markdown(f"**[{i}]** {chunk[:150]}..."))



### 🔎 Query: *What causes high blood pressure?*

#### 🧠 Baseline (No RAG):

High blood pressure, also known as hypertension, can be caused by a variety of factors. It is often described as a "silent killer" because many people with high blood pressure don't experience any symptoms. Here are the main causes:

1. **Age**: As you get older, your risk of developing high blood pressure increases.

2. **Family History**: High blood pressure tends to run in families, suggesting a genetic component.

3. **Inactivity**: Lack of physical activity can contribute to high blood pressure.

4. **Overweight or Obesity**: Extra weight, especially around the waist, puts extra strain on your heart and blood vessels.

5. **Poor Diet**: A diet high in salt, saturated fats, and cholesterol can lead to high blood pressure.

6. **Too Much Alcohol**: Excessive alcohol consumption can raise blood pressure over time.

7. **Smoking**: Nicotine in tobacco increases your blood pressure for up to an hour after smoking, and long-term smoking can damage the lining of your arteries.

8. **Stress**: While stress isn’t a direct cause, it can exacerbate other factors that lead to high blood pressure.

9. **Certain Medical Conditions**: Conditions like chronic kidney disease, sleep apnea, and thyroid disorders can contribute to high blood pressure.

10. **Age-Related Changes**: As you age, your body may have a harder time regulating blood pressure.

While some of these risk factors can be controlled (like diet, exercise, and avoiding smoking), others are more difficult to change. It's important to note that sometimes, the cause of high blood pressure is not clear, and it is called "essential hypertension." If you're concerned about your blood pressure, consult your healthcare provider who can advise on ways to manage it.

#### 📚 With RAG:

High blood pressure, also known as hypertension, can be caused by various factors. Here are some common causes:

1. **Age**: As people age, their blood pressure tends to increase.

2. **Genetics**: High blood pressure often runs in families.

3. **Lifestyle Factors**:
   - **Unhealthy Diet**: Consuming a lot of processed foods high in sodium can contribute to high blood pressure.
   - **Lack of Exercise**: Regular physical activity can help manage blood pressure.
   - **Excessive Alcohol Consumption**: Drinking too much alcohol can raise blood pressure.
   - **Smoking**: Smoking increases blood pressure immediately and over time can damage the lining of blood vessels, making them narrower and harder for blood to flow through.

4. **Obesity**: Excess weight, especially around the waist, can lead to higher blood pressure.

5. **Stress**: While stress itself doesn’t usually cause high blood pressure, it can trigger episodes in some people.

6. **Thyroid Problems**: An underactive thyroid gland (hypothyroidism) or an overactive thyroid gland (hyperthyroidism) can both lead to high blood pressure.

7. **Kidney Disease**: Damage to the kidneys can affect how they control blood pressure.

8. **Sleep Apnea**: A sleep disorder characterized by pauses in breathing during sleep can sometimes cause high blood pressure.

9. **Certain Medications**: Some medications, like birth control pills, cold remedies, certain asthma medicines, and erectile dysfunction drugs, can raise blood pressure.

10. **Caffeine Intake**: Large amounts of caffeine can cause a short-term spike in blood pressure.

11. **Medications**: Certain medications can increase blood pressure, including nonsteroidal anti-inflammatory drugs (NSAIDs), certain antidepressants, and decongestants.

Given your specific case, it's important to discuss your symptoms and medical history with a healthcare provider. They can perform tests and provide guidance tailored to your situation. Regular check-ups and maintaining a healthy lifestyle are key in managing blood pressure.

#### 🔍 Retrieved Chunks (Summary):

**[1]** Question: Hi, may I answer your health queries right now ?  Please type your query here... Why does my blood pressure rise for no good reason?  I can ...

**[2]** Question: Hello doctor, I am a 19 year old skinny male. My BP was 158/100, but that could be because I was anxious at the doctor's. I have no prior he...

**[3]** Question: My age is 31, ht 5'8", wt 176. Medical history consist of endometriosis, amenia, IBS, had three operations r/t my endometriosis, including o...

**[4]** Question: im 28 with recently consistant high blood pressure with several readings like 147/84 or 144/94 and so forth i have done nothing out of the o...

**[5]** Question: I'm a 26 year old male with hypertension. My blood pressure averages 160/110. I don't have high cholesterol, I'm not overweight, hypertensio...



### 🔎 Query: *Why is high blood pressure so common with people with diabetes?*

#### 🧠 Baseline (No RAG):

High blood pressure, also known as hypertension, is very common in individuals with diabetes. This association is due to several interconnected factors:

1. **Chronically Elevated Blood Glucose Levels**: Diabetes can lead to the buildup of glucose (sugar) in the bloodstream over time. When this happens, it can damage blood vessels and nerves throughout the body, including the arteries that carry blood to the heart. This damage can cause the arteries to narrow and stiffen, leading to higher blood pressure.

2. **Insulin Resistance and Obesity**: Many people with diabetes have insulin resistance, which means their bodies don't use insulin effectively. This often leads to weight gain, particularly around the waist, which can contribute to high blood pressure. Being overweight or obese is a well-known risk factor for high blood pressure.

3. **Renal Artery Stenosis**: The kidneys play a crucial role in regulating blood pressure. In diabetes, the kidneys can become damaged, and this can sometimes lead to narrowing of the renal arteries (renal artery stenosis). This narrowing can further elevate blood pressure.

4. **Autonomic Neuropathy**: Diabetes can cause nerve damage (neuropathy), particularly in the autonomic nervous system, which controls involuntary functions like blood pressure regulation. Damage to these nerves can impair the body's ability to maintain normal blood pressure.

5. **Complications of Diabetes**: Other complications associated with diabetes, such as retinopathy, neuropathy, and cardiovascular disease, can increase the likelihood of developing high blood pressure.

Given these factors, individuals with diabetes are at a higher risk for developing high blood pressure. It's important for people with diabetes to be regularly monitored for blood pressure levels and to manage their diabetes effectively to reduce this risk. Lifestyle changes, medication, and regular check-ups can help keep blood pressure within a healthy range.

#### 📚 With RAG:

High blood pressure, also known as hypertension, is very common among people with diabetes for several reasons:

1. **Underlying Conditions**: Diabetes can damage the blood vessels over time, leading to narrowing of the arteries (atherosclerosis). This damage increases the risk of high blood pressure.

2. **Age Factor**: People with diabetes tend to live longer, and older adults are more prone to developing high blood pressure. As people with diabetes age, their risk for high blood pressure increases.

3. **Obesity**: Many individuals with diabetes are overweight or obese, which is a major risk factor for high blood pressure. Excess weight increases the workload on the heart and arteries, leading to higher blood pressure readings.

4. **Poor Diet**: Diabetic individuals often have diets high in sodium (salt), which can contribute to high blood pressure. Managing diet is crucial in managing blood pressure for people with diabetes.

5. **Lack of Exercise**: Sedentary behavior, a common issue among those with diabetes, can lead to higher blood pressure. Regular physical activity helps lower blood pressure.

6. **Insulin Resistance**: Many people with type 2 diabetes have insulin resistance, which can impair the body's ability to regulate blood pressure effectively.

7. **Medications**: Certain medications commonly used to treat diabetes (such as diuretics) can sometimes increase blood pressure. It's important for people with diabetes to closely monitor their blood pressure and adjust their treatment plans accordingly.

8. **Complications of Diabetes**: Diabetes can affect various parts of the body including the circulatory system. Over time, these changes can lead to increased blood pressure.

Managing both diabetes and high blood pressure requires a comprehensive approach that includes regular monitoring, lifestyle modifications, and appropriate medication adjustments. It's important for individuals with diabetes to work closely with healthcare providers to manage these conditions effectively.

#### 🔍 Retrieved Chunks (Summary):

**[1]** Question: Good afternoon. My friend has been struggling for a week or two now with her blood pressure not coming down - it goes higher with each visit...

**[2]** Question: I am a type 2 diabetic currently dealing with elevated blood pressure. Have been on b/p meds for probably 20 years, but suddenly (?) I m run...

**[3]** Question: Hi, may I answer your health queries right now ?  Please type your query here...my girlfriend has type 1 diabetes she just took her blood pr...

**[4]** Question: my father has both diabetes and high B.P. and his b.p. is 230/130he is taking regular medicine for b.p. and taking precaution for diabetes s...

**[5]** Question: Hello doctor, I am 20 years old, type 2 diabetes patient. Please give me information about diabetes. Every morning, I do running around 2 to...

- **Question outside your domain**

In [None]:
from IPython.display import Markdown, display

# Test queries
queries = [
    "How do I reset my iPhone?"
]

for query in queries:
    print("="*80)
    display(Markdown(f"### 🔎 Query: *{query}*"))

    # Baseline
    display(Markdown("#### 🧠 Baseline (No RAG):"))
    no_rag_answer = generate_response(query)
    display(Markdown(no_rag_answer))

    # RAG
    display(Markdown("#### 📚 With RAG:"))
    rag_answer, retrieved_chunks = rag_generate(query)
    display(Markdown(rag_answer))

    # Retrieved chunks
    display(Markdown("#### 🔍 Retrieved Chunks (Summary):"))
    for i, chunk in enumerate(retrieved_chunks, 1):
        display(Markdown(f"**[{i}]** {chunk[:150]}..."))



### 🔎 Query: *How do I reset my iPhone?*

#### 🧠 Baseline (No RAG):

I'm sorry, but resetting your iPhone is not related to the medical field. For assistance with that, you should refer to Apple's support resources or guides.

#### 📚 With RAG:

I'm sorry, I can only answer questions related to the medical field.

#### 🔍 Retrieved Chunks (Summary):

**[1]** Question: I had a laceration on my penis caused by a dog bite in May 2010.  I went to the doctor and got medicine and there was no infection.  It is c...

**[2]** Question: Hi I dropped my iPhone on my 6.5 month old baby. There is small red bump on her forehead/head where the phone hit her. She cried instantaneo...

**[3]** Question: last night i retracted foreskin of my penis for the first time , it retraced easily but got stuck ,then i had to pull it back manually with ...

**[4]** Question: Hi doctor, I cleaned my ear with a Q-tip and now I cannot hear. Please help.
Answer: Hello For further information consult an audiologist on...

**[5]** Question: hellomy big finger, the one that is on the right side of my right handi can't turn it in the part that is connected to the rest of the hand,...