<a href="https://colab.research.google.com/github/avanthika-raja/2022305013/blob/main/HealthcareQ%26A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ========================
# STEP 1: Install packages
# ========================
!pip install -q google-generativeai biopython faiss-cpu

# ====================
# STEP 2: Import modules
# ====================
import google.generativeai as genai
from Bio import Entrez
import numpy as np
import faiss
import textwrap

# ====================
# STEP 3: Configuration
# ====================
GEMINI_API_KEY = "AIzaSyB1r-cscbM6A89qn59_0bstxAucOJ4cz4I"         # Replace with your Gemini API key
Entrez.email = "your_email@example.com"             # Required for PubMed access

# ====================
# STEP 4: Initialize Gemini
# ====================
genai.configure(api_key=GEMINI_API_KEY)
generation_model = genai.GenerativeModel("gemini-1.5-pro")

# ====================
# STEP 5: Fetch PubMed abstracts
# ====================
def fetch_pubmed_abstracts(query, max_results=10):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    ids = Entrez.read(handle)["IdList"]
    if not ids:
        return []
    fetch = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="abstract", retmode="text")
    abstracts = fetch.read().strip().split("\n\n")
    return [a.strip().replace("\n", " ") for a in abstracts if a.strip()]

# ====================
# STEP 6: Embed texts using Gemini embedding-001
# ====================
def embed_texts(texts):
    vectors = []
    for text in texts:
        response = genai.embed_content(
            model="models/embedding-001",
            content=text,
            task_type="retrieval_document"
        )
        vectors.append(response["embedding"])
    return np.array(vectors).astype("float32")

# ====================
# STEP 7: Build FAISS index
# ====================
def build_faiss_index(vectors):
    index = faiss.IndexFlatL2(vectors.shape[1])
    index.add(vectors)
    return index

# ====================
# STEP 8: Generate answer using Gemini Pro + retrieved docs
# ====================
def generate_answer_rag(question, context_texts, index, k=3):
    query_vec = embed_texts([question])[0].reshape(1, -1)
    _, top_indices = index.search(query_vec, k)
    retrieved = "\n\n".join([context_texts[i] for i in top_indices[0]])

    prompt = f"""
You are a helpful and knowledgeable healthcare assistant. Use the following PubMed research context
to answer the user's question.

User Question:
{question}

PubMed Context:
{retrieved}

Provide a clear, evidence-based medical response:
"""
    response = generation_model.generate_content(prompt)
    return textwrap.fill(response.text, width=100)

# ====================
# STEP 9: Run the Bot
# ====================
print("Gemini RAG Healthcare Q&A Bot (embedding-001 + FAISS + PubMed)\n")
topic = input("Enter a medical topic to build the knowledge base (e.g., 'malaria vaccine'): ")
abstracts = fetch_pubmed_abstracts(topic, max_results=15)

if not abstracts:
    print("No relevant PubMed abstracts found.")
else:
    print("\nEmbedding and indexing PubMed data...")
    vectors = embed_texts(abstracts)
    index = build_faiss_index(vectors)

    print("\nKnowledge base ready. Ask questions related to this topic.")
    while True:
        user_q = input("\nAsk a question (or type 'exit'): ")
        if user_q.lower() in ['exit', 'quit']:
            print("Goodbye!")
            break
        answer = generate_answer_rag(user_q, abstracts, index)
        print("\nAnswer:\n", answer, "\n")

Gemini RAG Healthcare Q&A Bot (embedding-001 + FAISS + PubMed)


Embedding and indexing PubMed data...

Knowledge base ready. Ask questions related to this topic.

Answer:
 Prion diseases are a group of rare, fatal neurodegenerative conditions. They cause progressive
damage to the brain, leading to a decline in neurological function and ultimately death.  This
damage is characterized by the abnormal accumulation of a misfolded protein called a prion
(PrP<sup>Sc</sup>), specifically a misfolded version of a normal cellular protein (PrP<sup>C</sup>).
This abnormal prion protein triggers a chain reaction, causing more normal prions to misfold and
clump together. These clumps disrupt the normal structure and function of brain tissue, leading to
the characteristic spongiform appearance (like a sponge with holes) seen in affected brains.  The
exact mechanism of neurotoxicity is still under investigation, as alluded to in the provided
research titles, but the accumulation of misfolded prion p