# Setting up Env

In [1]:
import json
import os
import numpy as np
from typing import List, Dict
from sentence_transformers import SentenceTransformer, util

2025-11-30 20:41:53.460761: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764535313.665070      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764535313.725985      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [14]:
!pip install transformers accelerate huggingface-hub sentence-transformers faiss-cpu streamlit python-dotenv --quiet


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [15]:
# using this lightweight sentence transformer
import faiss
MODEL_NAME = 'all-MiniLM-L6-v2'
TOP_K=3

# RAG Pipeline

In [25]:
def load_data(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

kb_items = load_data("/kaggle/input/medicaldataset-nlp-a04/combined_rag_data.json")

model = SentenceTransformer(MODEL_NAME,device='cuda')

def generate_embeddings(model, items):
    texts = []
    for item in items:
        if 'medicalKB' in item:
            texts.append(item['medicalKB'])
        elif 'patient_case' in item:
            texts.append(" ".join(item['patient_case']['inputs'].values()))
    embeddings = model.encode(texts, convert_to_tensor=True, device="cuda")
    return embeddings, texts

kb_embeddings, kb_texts = generate_embeddings(model, kb_items)

def retrieve_top_k(query, model, index, items, k=TOP_K):
    q_emb = model.encode([query], convert_to_tensor=True, device="cuda")
    q_emb_np = q_emb.cpu().detach().numpy().astype("float32")
    faiss.normalize_L2(q_emb_np)
    scores, idx = index.search(q_emb_np, k)
    return [(items[i], float(scores[0][j])) for j, i in enumerate(idx[0])]

def build_faiss_index(embeddings):
    embeddings_np = embeddings.cpu().detach().numpy().astype("float32")
    faiss.normalize_L2(embeddings_np) 
    d = embeddings_np.shape[1]
    index = faiss.IndexFlatIP(d)
    index.add(embeddings_np)
    return index


faiss_index = build_faiss_index(kb_embeddings)
faiss.write_index(faiss_index, "medical_kb_index.faiss")

def construct_prompt(patient_case, retrieved):
    context_str = "Relevant Information:\n"
    for item, score in retrieved:
        if 'medicalKB' in item:
            context_str += f"- {item['medicalKB'][:500]}... (Relevance: {score:.2f})\n"
        elif 'patient_case' in item:
            patient_text = " ".join(item['patient_case']['inputs'].values())
            context_str += f"- {patient_text[:500]}... (Relevance: {score:.2f})\n"

    prompt = f"""
You are an expert medical diagnostician.
Use the provided medical knowledge and patient cases to explain the query.

{context_str}

Patient Query:
{patient_case['input_text']}

Give reasoning and final answer.
"""
    return prompt



# Preparing HuggingFace LLM

In [26]:
from transformers import AutoTokenizer,AutoModelForCausalLM
import torch
LLM_Model="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
tokenizer=AutoTokenizer.from_pretrained(LLM_Model,use_fast=False)
llm=AutoModelForCausalLM.from_pretrained(LLM_Model,torch_dtype=torch.float16,device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [27]:
def llm_generate(prompt, max_new_tokens=256):
    inputs = tokenizer(prompt, return_tensors="pt").to(llm.device)
    output = llm.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [28]:
def run_rag(patient_case, kb_items, index):
    retrieved = retrieve_top_k(patient_case["input_text"], model, index, kb_items)
    prompt = construct_prompt(patient_case, retrieved)
    answer = llm_generate(prompt)
    return answer, retrieved, prompt



# Model Inference

In [29]:
patient_case = {
    "input_text": "A 65-year-old male with aggressive behavior and recent falls..."
}

faiss_index = faiss.read_index("medical_kb_index.faiss")
answer, retrieved, prompt = run_rag(patient_case, kb_items, faiss_index)
print(answer)



The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



You are an expert medical diagnostician.
Use the provided medical knowledge and patient cases to explain the query.

Relevant Information:
- Dementia with aggresive behavior Man with CAD, prostate cancer, meningioma, and gastritis, and two recent ER visits for aggressive behavior, sent from his nursing facility for safety evaluation after striking another resident in the face at the facility. 

This patient has been living in the Memory Unit about the past year. Seen in our ED on for similar presentation. On his last visit, after one day in ED observation was willing to take the patient back if under good behavioral control, which he... (Relevance: 0.44)
- Cardiac ArrestChest Pain He is a 58 year old gentleman with a history of significant alcohol use and heavy smoking with a recent neurologic decline who presents to the CCU after witnessed cardiac arrest. 

He has had a steep neurologic decline over the past two months. He has been a lifelong heavy drinker and smoker, but he's been d

# Using API Models

In [31]:
!pip install google-generativeai python-dotenv --quiet

import os
from dotenv import load_dotenv
import google.generativeai as genai

# Get API key
api_key = "AIzaSyCVdJA9SNoTgas-fytZtPatxfn8-5IgmZ4"


genai.configure(api_key=api_key)

# Example prompt
prompt = """
First explain what it diesase could be then give all the reasons
Explain the symptoms of a 60-year-old male with sudden chest pain radiating to the back.
Use headings or bullet points where needed.
Give response in plain text without * Behave like medical assistant
"""

# Generate response
model = genai.GenerativeModel("gemini-2.0-flash")
response = model.generate_content(prompt)

print(response.text)



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Okay, here's a breakdown of what a 60-year-old male experiencing sudden chest pain radiating to the back could indicate, along with possible reasons and symptoms:

**Possible Disease:**

The primary concern with sudden chest pain radiating to the back in a 60-year-old male is **acute aortic dissection**. This is a life-threatening condition. However, other serious possibilities include a **heart attack (acute myocardial infarction)**, and less likely, **pulmonary embolism**.

**Reasons for Sudden Chest Pain Radiating to the Back:**

*   **Aortic Dissection:**
    *   A tear in the inner layer of the aorta (the body's largest artery) allows blood to surge between the layers of the aortic wall, causing them to separate (dissect).
    *   Risk factors include: high blood pressure, genetic conditions affecting the aorta (like Marfan syndrome), pre-existing aortic aneurysm, atherosclerosis (hardening of the arteries), injury to the chest, and occasionally, during weightlifting.
*   **Acute 