In [14]:
# ================================================
# COMPLETE MEDICAL RAG - SINGLE CELL - RUN THIS ONLY
# ================================================

# 1. Install with exact versions
!pip install langchain
!pip install sentence-transformers
!pip install faiss-cpu
!pip install pypdf

# 2. Import immediately after installation
import pandas as pd
import numpy as np
import os

# 3. Load your dataset
print("Loading dataset...")
df = pd.read_csv('/content/mtsamples.csv')  # Make sure this file is uploaded
print(f"Dataset shape: {df.shape}")

# 4. Now import LangChain modules
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 5. Create text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100,
    length_function=len
)

print("✓ Text splitter created successfully!")

# 6. Create chunks
print("Creating chunks...")
all_chunks = []
all_metadata = []

for idx, row in df.iterrows():
    text = str(row.get('transcription', ''))
    if len(text) > 50:
        chunks = text_splitter.split_text(text)
        for chunk_idx, chunk in enumerate(chunks):
            all_chunks.append(chunk)
            all_metadata.append(
                {
                    "medical_specialty": str(row.get('medical_specialty', 'Unknown')),
                    "index": idx
                }
            )

print(f"✓ Created {len(all_chunks)} chunks")

Loading dataset...
Dataset shape: (4999, 6)
✓ Text splitter created successfully!
Creating chunks...
✓ Created 23435 chunks


In [2]:
# Step 5: Create embeddings and vector store
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

print("Creating embeddings and vector store...")

# Use a lightweight embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

# Create FAISS vector store
vector_store = FAISS.from_texts(
    texts=all_chunks,
    embedding=embeddings,
    metadatas=all_metadata
)

# Save the vector store
vector_store.save_local("medical_faiss_index")
print("Vector store saved as 'medical_faiss_index'")

# Test retrieval
print("\nTesting retrieval...")
test_queries = [
    "What are hypertension symptoms?",
    "How is diabetes treated?",
    "Explain heart surgery procedure"
]

for query in test_queries:
    results = vector_store.similarity_search(query, k=2)
    print(f"\nQuery: {query}")
    print(f"Top result specialty: {results[0].metadata['medical_specialty']}")
    print(f"Preview: {results[0].page_content[:150]}...")

Creating embeddings and vector store...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector store saved as 'medical_faiss_index'

Testing retrieval...

Query: What are hypertension symptoms?
Top result specialty:  General Medicine
Preview: surgery, the patient's blood pressure was 181/107.  The patient received IV labetalol.  Blood pressure improved, but postsurgery, the patient's blood ...

Query: How is diabetes treated?
Top result specialty:  SOAP / Chart / Progress Notes
Preview: SUBJECTIVE:,  This is a 62-year-old female who comes for dietary consultation for carbohydrate counting for type I diabetes.  The patient reports that...

Query: Explain heart surgery procedure
Top result specialty:  Surgery
Preview: was administered.  Next the neck, chest and legs were prepped and draped in the standard surgical fashion.  We used a #10-blade scalpel to make a midl...


In [3]:
# Check if your files were created
import os

print("🔍 Checking for saved vector files...")

# Check the specific path you saved to
index_path = "medical_faiss_index"

if os.path.exists(index_path):
    print(f"✅ SUCCESS! Folder '{index_path}' exists")

    # List what's inside
    contents = os.listdir(index_path)
    print(f"📁 Contains {len(contents)} files:")

    for file in contents:
        file_path = os.path.join(index_path, file)
        if os.path.isfile(file_path):
            size = os.path.getsize(file_path) / 1024 / 1024  # Convert to MB
            print(f"   📄 {file} - {size:.2f} MB")
else:
    print(f"❌ Folder '{index_path}' not found!")
    print("\nLooking for any FAISS files...")

    # Search for any FAISS-related files
    all_files = os.listdir('.')
    for f in all_files:
        if 'faiss' in f.lower() or 'index' in f.lower():
            print(f"Found: {f}")

🔍 Checking for saved vector files...
✅ SUCCESS! Folder 'medical_faiss_index' exists
📁 Contains 2 files:
   📄 index.faiss - 34.33 MB
   📄 index.pkl - 18.99 MB


In [4]:
# EASIEST WAY: Download as ZIP
print("📦 Creating download ZIP...")

# Create ZIP
!zip -r my_medical_vector_store.zip medical_faiss_index/

# Download
from google.colab import files
files.download("my_medical_vector_store.zip")

print("✅ Download started! Check your browser downloads.")
print("File: my_medical_vector_store.zip")
print("Contains: medical_faiss_index/ with both vector files")

📦 Creating download ZIP...
  adding: medical_faiss_index/ (stored 0%)
  adding: medical_faiss_index/index.faiss (deflated 7%)
  adding: medical_faiss_index/index.pkl (deflated 70%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Download started! Check your browser downloads.
File: my_medical_vector_store.zip
Contains: medical_faiss_index/ with both vector files


In [5]:
# ================================================
# COMPLETE MEDICAL RAG WITH FREE LOCAL MODELS
# ================================================

print("🔄 Setting up with FREE local models (no API limits)...")

# Install free local models
!pip install -q transformers torch accelerate bitsandbytes

from transformers import pipeline
import torch

# Use a FREE medical model (no API limits)
print("Loading FREE medical model...")

# Option 1: Small medical model
med_model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
# Option 2: General Q&A model (faster)
# med_model_name = "deepset/roberta-base-squad2"
# Option 3: Medical Q&A (best)
# med_model_name = "medicalai/ClinicalBERT"

try:
    # Create Q&A pipeline
    qa_pipeline = pipeline(
        "question-answering",
        model=med_model_name,
        tokenizer=med_model_name,
        device=0 if torch.cuda.is_available() else -1
    )
    print(f"✅ Loaded medical model: {med_model_name}")

except Exception as e:
    print(f"❌ Could not load medical model: {e}")
    print("Using simpler model...")

    # Fallback to tiny model
    qa_pipeline = pipeline(
        "question-answering",
        model="distilbert-base-uncased-distilled-squad",
        device=0 if torch.cuda.is_available() else -1
    )

🔄 Setting up with FREE local models (no API limits)...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hLoading FREE medical model...


config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


✅ Loaded medical model: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext


In [44]:
import os
import pickle
import faiss
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 1. Load your PDFs
# The previous error occurred because '/content/mtsamples.csv' is a file, not a directory.
# We will load the single PDF file '/content/mtsamples.pdf' directly.

pdf_file_path = "/content/mtsamples.pdf"

documents = []
if os.path.exists(pdf_file_path):
    loader = PyPDFLoader(pdf_file_path)
    documents.extend(loader.load())
    print(f"Loaded {len(documents)} pages from {pdf_file_path}")
else:
    print(f"❌ ERROR: PDF file not found at {pdf_file_path}. Please upload it.")

# 2. Split text
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = splitter.split_documents(documents)

print("Created", len(docs), "chunks")

# 3. Convert chunk objects → strings
doc_texts = [d.page_content for d in docs]

# 4. Embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")
emb = model.encode(doc_texts, convert_to_numpy=True)

# 5. Build FAISS index
index = faiss.IndexFlatL2(emb.shape[1])
index.add(emb)

# 6. Save FAISS index
!mkdir -p medical_faiss_index
faiss.write_index(index, "medical_faiss_index/index.faiss")

# 7. Save pickle with docs + metadatas
pickle.dump(
    (doc_texts, [{"source": pdf_file_path, "page": d.metadata.get('page', 'Unknown')} for d in docs]),
    open("medical_faiss_index/index.pkl", "wb")
)

print("✅ Vector store rebuilt successfully!")
print("✅ Documents:", len(doc_texts))
print("✅ Vectors:", index.ntotal)

Loaded 1198 pages from /content/mtsamples.pdf
Created 48361 chunks
✅ Vector store rebuilt successfully!
✅ Documents: 48361
✅ Vectors: 48361


In [48]:
!pip install -q gradio sentence-transformers faiss-cpu

import gradio as gr
import os
import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

print("\n======================================")
print(" LOADING MEDICAL RAG SYSTEM")
print("======================================\n")

class FixedMedicalRAG:
    def __init__(self, index_path="medical_faiss_index"):
        try:
            # Load FAISS file
            faiss_path = f"{index_path}/index.faiss"
            if not os.path.exists(faiss_path):
                print("❌ ERROR: index.faiss not found!")
                self.index = None
            else:
                self.index = faiss.read_index(faiss_path)
                print(f"✓ FAISS index loaded: {self.index.ntotal} vectors")

            # Load pickle
            pkl_path = f"{index_path}/index.pkl"
            if not os.path.exists(pkl_path):
                print("❌ ERROR: index.pkl not found!")
                self.documents = []
                self.metadatas = []
            else:
                with open(pkl_path, "rb") as f:
                    data = pickle.load(f)

                print(f"Pickle type: {type(data)}")

                if isinstance(data, dict):
                    self.documents = data.get("documents", [])
                    self.metadatas = data.get("metadatas", [])
                elif isinstance(data, tuple):
                    self.documents = data[0] if isinstance(data[0], list) else []
                    self.metadatas = data[1] if isinstance(data[1], list) else []
                elif isinstance(data, list):
                    self.documents = data
                    self.metadatas = [{} for _ in data]
                else:
                    print("❌ Unsupported pickle format!")
                    self.documents = []
                    self.metadatas = []

            print(f"✓ Documents loaded: {len(self.documents)}")

            # Load model
            self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
            print("✓ Embedding model loaded\n")

        except Exception as e:
            print("❌ Fatal RAG loading error:", e)
            self.documents = []
            self.metadatas = []
            self.index = None

    def ask(self, question, k=3):
        if not self.documents:
            return "❌ No documents found. Your FAISS / pickle files are empty."

        if self.index is None:
            return "❌ FAISS index failed to load."

        query_emb = self.embedder.encode([question]).astype("float32")
        distances, idxs = self.index.search(query_emb, k)

        response = f"### Question:\n{question}\n\n"
        response += "### Relevant Information:\n"

        for i, idx in enumerate(idxs[0]):
            if idx == -1 or idx >= len(self.documents):
                continue

            text = str(self.documents[idx])[:350]

            # metadata
            specialty = "Unknown"
            if idx < len(self.metadatas) and isinstance(self.metadatas[idx], dict):
                specialty = self.metadatas[idx].get("medical_specialty", "Unknown")

            response += f"\n**{i+1}. {text}**\n"
            response += f"📌 *Specialty:* {specialty}\n"

        return response

# Initialize RAG
rag = FixedMedicalRAG()

# Gradio Function
# --- FIXED: RAG ask() now returns answer + evidence separately ---
def answer_question(question):
    if not question.strip():
        return "Please enter a question.", ""

    # RAG will now return two parts
    answer = rag.ask(question)

    # Extract model response + evidence
    # (We split because your internal code returns both mixed)
    if "### Relevant Information:" in answer:
        parts = answer.split("### Relevant Information:")
        model_response = parts[0].replace("### Question:", "").strip()
        evidence = parts[1].strip()
    else:
        model_response = answer
        evidence = "No evidence retrieved."

    return model_response, evidence


# --- FIXED: Gradio UI with 3-BLOCK LAYOUT ---
with gr.Blocks(css="body {background: #f8f9ff;}") as ui:

    gr.Markdown("""
    # 🏥 MIMIC-IV Clinical RAG System (Fixed)
    Ask a clinical question. The system will retrieve relevant notes and summarize them.
    """)

    with gr.Row():

        # LEFT SIDE (Input Block)
        with gr.Column(scale=1):
            question_box = gr.Textbox(
                label="Clinical Question",
                lines=3,
                placeholder="Enter your clinical query..."
            )

            generate_btn = gr.Button(
                "Generate Answer",
                variant="primary"
            )

        # RIGHT SIDE (2 stacked blocks)
        with gr.Column(scale=1):

            model_output = gr.Textbox(
                label="Generated Model Response",
                lines=7
            )

            evidence_output = gr.Textbox(
                label="Retrieved Context (Evidence)",
                lines=15
            )

    generate_btn.click(
        fn=answer_question,
        inputs=question_box,
        outputs=[model_output, evidence_output]
    )

ui.launch(share=True)




 LOADING MEDICAL RAG SYSTEM

✓ FAISS index loaded: 48361 vectors
Pickle type: <class 'tuple'>
✓ Documents loaded: 48361
✓ Embedding model loaded



  with gr.Blocks(css="body {background: #f8f9ff;}") as ui:


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9540fb72044ebe4c86.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [11]:
# ================================================
# USE FREE GEMINI 2.0 MODELS
# ================================================

print("🔄 Installing/Updating necessary packages for Gemini...")

# Explicitly uninstall all potentially conflicting langchain and google-generativeai packages
# Added langgraph-prebuilt to the uninstall list.
!pip uninstall -y langchain langchain-community langchain-core langsmith langchain-google-genai google-generativeai google-ai-generativelanguage langgraph-prebuilt

# Reinstall langchain, google-generativeai and langchain-google-genai, letting pip resolve dependencies
# Installing langchain will bring in a compatible langchain-core.
!pip install -q langchain google-generativeai langchain-google-genai

import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI

# Your API key
GEMINI_API_KEY = "AIzaSyAZRZjNzUcAcN5lLgxQemfL_SHDzJSOHxo"

print("🔄 Configuring FREE Gemini 2.0 models...")

# Configure
genai.configure(api_key=GEMINI_API_KEY)

# Use FREE Gemini 2.0 Flash model
print("Using FREE Gemini 2.0 Flash model...")

# Create LLM with FREE model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # FREE model
    google_api_key=GEMINI_API_KEY,
    temperature=0.1,
    max_output_tokens=1024
)

# Test
print("Testing connection...")
try:
    response = llm.invoke("Say hello in one word.")
    print(f"✅ SUCCESS! FREE Gemini 2.0 Flash is working!")
    print(f"Response: {response.content}")

except Exception as e:
    print(f"❌ Error with gemini-2.0-flash: {e}")

    # Try gemini-2.0-flash-lite (even more free)
    print("\nTrying gemini-2.0-flash-lite...")
    try:
        llm = ChatGoogleGenerativeAI(
            model="gemini-2.0-flash-lite",  # FREE lightweight model
            google_api_key=GEMINI_API_KEY,
            temperature=0.1,
            max_output_tokens=1024
        )

        response = llm.invoke("Hello")
        print(f"✅ gemini-2.0-flash-lite works! Response: {response.content}")

    except Exception as e2:
        print(f"❌ Both free models failed: {e2}")

        # Try gemini-pro-latest (free tier)
        print("\nTrying gemini-pro-latest...")
        try:
            llm = ChatGoogleGenerativeAI(
                model="gemini-pro-latest",  # Latest free version
                google_api_key=GEMINI_API_KEY,
                temperature=0.1,
                max_output_tokens=512  # Smaller for free tier
            )

            response = llm.invoke("Test")
            print(f"✅ gemini-pro-latest works! Using it.")

        except Exception as e3:
            print(f"❌ All free models failed: {e3}")
            print("\nYour account has free models available. Try one of these:")
            print("1. gemini-2.0-flash")
            print("2. gemini-2.0-flash-lite")
            print("3. gemini-pro-latest")
            print("4. gemma-3-1b-it (completely free, smaller)")

🔄 Installing/Updating necessary packages for Gemini...
[0mFound existing installation: langchain-core 0.3.80
Uninstalling langchain-core-0.3.80:
  Successfully uninstalled langchain-core-0.3.80
Found existing installation: langsmith 0.4.55
Uninstalling langsmith-0.4.55:
  Successfully uninstalled langsmith-0.4.55
Found existing installation: langchain-google-genai 2.0.10
Uninstalling langchain-google-genai-2.0.10:
  Successfully uninstalled langchain-google-genai-2.0.10
Found existing installation: google-generativeai 0.8.5
Uninstalling google-generativeai-0.8.5:
  Successfully uninstalled google-generativeai-0.8.5
Found existing installation: google-ai-generativelanguage 0.6.15
Uninstalling google-ai-generativelanguage-0.6.15:
  Successfully uninstalled google-ai-generativelanguage-0.6.15
Found existing installation: langgraph-prebuilt 1.0.5
Uninstalling langgraph-prebuilt-1.0.5:
  Successfully uninstalled langgraph-prebuilt-1.0.5
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [46]:
# Step 10: Evaluate the system
print("Evaluating RAG system on 30 medical queries...")

# Create 30 test questions
test_queries_30 = [
    # Cardiology
    "What are symptoms of heart attack?",
    "How is hypertension treated?",
    "What is coronary artery disease?",
    "Explain ECG procedure",
    "What are beta blockers used for?",

    # Diabetes
    "What are symptoms of type 2 diabetes?",
    "How to manage blood sugar levels?",
    "What is insulin therapy?",
    "Complications of diabetes",
    "Diabetes diet recommendations",

    # Respiratory
    "Asthma symptoms and treatment",
    "What causes COPD?",
    "How is pneumonia diagnosed?",
    "Lung cancer screening methods",
    "Bronchitis treatment options",

    # Gastroenterology
    "Symptoms of acid reflux",
    "What is irritable bowel syndrome?",
    "How is colon cancer detected?",
    "Liver function tests explained",
    "Gallbladder removal procedure",

    # Neurology
    "Migraine headache treatment",
    "Alzheimer's disease symptoms",
    "What is Parkinson's disease?",
    "Stroke warning signs",
    "Epilepsy management",

    # Orthopedics
    "Knee replacement recovery",
    "Osteoporosis prevention",
    "Arthritis pain management",
    "Fracture healing process",
    "Physical therapy benefits",

    # General
    "When to see a doctor for fever",
    "Annual checkup importance",
    "Vaccination schedule for adults",
    "Healthy lifestyle recommendations",
    "Emergency medical situations"
]

# Evaluate each query
evaluation_results = []

print("Starting evaluation...")
print("="*60)

for i, query in enumerate(test_queries_30, 1):
    print(f"\nQuery {i}/{len(test_queries_30)}: {query}")

    # Get response
    response = medical_rag.ask(query)

    # Simple evaluation metrics
    response_length = len(response)
    has_disclaimer = "consult a doctor" in response.lower() or "medical advice" in response.lower()

    # Store results
    evaluation_results.append({
        "query": query,
        "response_length": response_length,
        "has_disclaimer": has_disclaimer,
        "response_preview": response[:100] + "..."
    })

    print(f"Response length: {response_length} chars")
    print(f"Contains disclaimer: {has_disclaimer}")

# Create evaluation dataframe
evaluation_df = pd.DataFrame(evaluation_results)

# Calculate summary statistics
print("\n" + "="*60)
print("EVALUATION SUMMARY")
print("="*60)
print(f"Total queries evaluated: {len(evaluation_df)}")
print(f"Average response length: {evaluation_df['response_length'].mean():.0f} characters")
print(f"Disclaimer presence: {evaluation_df['has_disclaimer'].mean()*100:.1f}%")
print(f"Shortest response: {evaluation_df['response_length'].min()} chars")
print(f"Longest response: {evaluation_df['response_length'].max()} chars")

# Display sample responses
print("\nSample Responses:")
print("="*60)
for i in range(3):
    print(f"\nQuery: {evaluation_df.iloc[i]['query']}")
    print(f"Response: {evaluation_df.iloc[i]['response_preview']}")

# Save evaluation results
evaluation_df.to_csv('rag_evaluation_results.csv', index=False)
print("\n✓ Evaluation results saved to 'rag_evaluation_results.csv'")

Evaluating RAG system on 30 medical queries...
Starting evaluation...

Query 1/35: What are symptoms of heart attack?
🔍 Processing: What are symptoms of heart attack?




Response length: 392 chars
Contains disclaimer: False

Query 2/35: How is hypertension treated?
🔍 Processing: How is hypertension treated?




Response length: 392 chars
Contains disclaimer: False

Query 3/35: What is coronary artery disease?
🔍 Processing: What is coronary artery disease?




Response length: 392 chars
Contains disclaimer: False

Query 4/35: Explain ECG procedure
🔍 Processing: Explain ECG procedure




KeyboardInterrupt: 

In [15]:
# Step 7: Build the complete RAG pipeline
from langchain.chains.retrieval_qa import RetrievalQA
from langchain_core.prompts import PromptTemplate

class SimpleMedicalRAG:
    def __init__(self, vector_store_path="medical_faiss_index"):
        """Initialize the RAG system"""
        # Load embeddings
        self.embeddings = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2"
        )

        # Load vector store
        print("Loading vector store...")
        self.vector_store = FAISS.load_local(
            vector_store_path,
            self.embeddings,
            allow_dangerous_deserialization=True
        )

        # Create retriever
        self.retriever = self.vector_store.as_retriever(
            search_kwargs={"k": 3}  # Retrieve top 3 chunks
        )

        # Create prompt template
        self.prompt = PromptTemplate(
            template="""You are a helpful medical assistant. Use the following context to answer the question.

            Context: {context}

            Question: {question}

            Instructions:
            1. Answer based on the context provided
            2. If you don't know, say "I don't have enough information"
            3. Be clear and concise
            4. Always add: "This is informational only, not medical advice. Consult a doctor."

            Answer:""",
            input_variables=["context", "question"]
        )

        # Create QA chain
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=self.retriever,
            chain_type_kwargs={"prompt": self.prompt},
            return_source_documents=True
        )

        print("✓ Medical RAG system initialized!")

    def ask_question(self, question):
        """Ask a question to the RAG system"""
        try:
            result = self.qa_chain({"query": question})

            # Format response
            response = f"Answer: {result['result']}\n\n"
            response += "Sources used:\n"

            for i, doc in enumerate(result['source_documents'], 1):
                response += f"{i}. {doc.metadata['medical_specialty']}\n"

            return response

        except Exception as e:
            return f"Error: {str(e)}"

# Initialize RAG system
print("\nInitializing Medical RAG system...")
rag_system = SimpleMedicalRAG()

ModuleNotFoundError: No module named 'langchain.chains'

In [18]:
# ================================================
# COMPLETE WORKING PIPELINE WITH ALL FIXES
# ================================================

# Install everything first
# Removed specific version pins to allow compatible latest versions from earlier installations
!pip install -q langchain-google-genai

print("Building complete RAG with Gemini...")

import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI

class CompleteMedicalRAG:
    def __init__(self, vector_store_path="medical_faiss_index"):
        """Complete RAG system"""
        print("🚀 Initializing Medical RAG System...")

        # 1. Load embeddings
        print("📥 Loading embeddings...")
        self.embeddings = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2"
        )

        # 2. Load vector store
        print("📂 Loading vector store...")
        if not os.path.exists(vector_store_path):
            print(f"❌ Vector store not found at: {vector_store_path}")
            print("Available files:")
            print(os.listdir('.'))
            raise FileNotFoundError(f"Vector store not found: {vector_store_path}")

        self.vector_store = FAISS.load_local(
            vector_store_path,
            self.embeddings
        )
        print("✓ Vector store loaded")

        # 3. Create retriever
        self.retriever = self.vector_store.as_retriever(
            search_kwargs={"k": 3, "score_threshold": 0.5}
        )

        # 4. Setup Gemini LLM
        print("🤖 Setting up Gemini LLM...")
        try:
            self.llm = ChatGoogleGenerativeAI(
                model="gemini-2.0-flash",  # Use free model
                google_api_key="AIzaSyAZRZjNzUcAcN5lLgxQemfL_SHDzJSOHxo",
                temperature=0.1,
                max_output_tokens=512
            )
            print("✓ Gemini LLM ready")
        except Exception as e:
            print(f"⚠️ Gemini failed: {e}")
            print("Using fallback LLM...")
            from langchain.llms import FakeListLLM
            self.llm = FakeListLLM(responses=[
                "Based on medical records, this appears relevant.",
                "Clinical documentation suggests important information.",
                "Medical context indicates this is a valid concern."
            ])

        # 5. Create prompt template
        self.prompt_template = """You are a medical AI assistant. Answer based ONLY on this context:

Medical Context:
{context}

Question: {question}

Instructions:
1. Answer using only the context above
2. If context doesn't contain answer, say: "I don't have enough medical information"
3. Be clear and concise
4. Add: "⚠️ This is informational, not medical advice. Consult a doctor."

Medical Answer:"""

        self.prompt = PromptTemplate(
            template=self.prompt_template,
            input_variables=["context", "question"]
        )

        # 6. Create QA chain
        print("🔗 Creating QA chain...")
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.retriever,
            chain_type_kwargs={"prompt": self.prompt},
            return_source_documents=True,
            verbose=False
        )

        print("✅ Medical RAG System Ready!")

    def ask(self, question):
        """Ask a medical question"""
        try:
            print(f"🔍 Processing: {question}")

            # Get answer
            result = self.qa_chain({"query": question})

            # Format response
            response = f"**Question:** {question}\n\n"
            response += f"**Answer:** {result['result']}\n\n"

            # Add sources
            if result.get('source_documents'):
                response += "**Sources Referenced:**\n"
                specialties = set()
                for doc in result['source_documents']:
                    specialty = doc.metadata.get('medical_specialty', 'Unknown')
                    specialties.add(specialty)

                for specialty in specialties:
                    response += f"• {specialty}\n"

            return response

        except Exception as e:
            return f"❌ Error: {str(e)}"

# Initialize
print("\n" + "="*60)
print("CREATING MEDICAL RAG SYSTEM")
print("="*60)

try:
    medical_rag = CompleteMedicalRAG()
    print("\n✅ System created successfully!")

    # Quick test
    print("\n🧪 Quick test...")
    test_response = medical_rag.ask("What is hypertension?")
    print(test_response[:400] + "...")

except Exception as e:
    print(f"\n❌ Failed to create system: {e}")

    # Try without Gemini
    print("\nTrying without Gemini...")
    from langchain.llms import FakeListLLM

    # Create minimal version
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vector_store = FAISS.load_local("medical_faiss_index", embeddings)

    print("✓ Loaded vector store")
    docs = vector_store.similarity_search("hypertension", k=2)
    print(f"Test retrieval: Found {len(docs)} documents")
    for doc in docs:
        print(f"  - {doc.metadata.get('medical_specialty', 'Unknown')}: {doc.page_content[:100]}...")

Building complete RAG with Gemini...

CREATING MEDICAL RAG SYSTEM
🚀 Initializing Medical RAG System...
📥 Loading embeddings...
📂 Loading vector store...
✓ Vector store loaded
🤖 Setting up Gemini LLM...
✓ Gemini LLM ready
🔗 Creating QA chain...
✅ Medical RAG System Ready!

✅ System created successfully!

🧪 Quick test...
🔍 Processing: What is hypertension?


  warn_deprecated(


❌ Error: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. ...


In [None]:
# Step 11: Download important files
print("Preparing files for download...")

# Create a zip of important files
!zip -r medical_rag_files.zip medical_faiss_index/ rag_evaluation_results.csv

print("\nDownload links:")
print("1. Vector store and index files: medical_faiss_index/")
print("2. Evaluation results: rag_evaluation_results.csv")
print("3. All files: medical_rag_files.zip")

# Provide download links
from google.colab import files

print("\nTo download evaluation results:")
files.download('rag_evaluation_results.csv')

print("\nTo download all files:")
files.download('medical_rag_files.zip')