# Biomedical Question Answering with RAG (Open Source + Google Colab Compatible)
This notebook builds a Biomedical RAG (Retrieval-Augmented Generation) system using open-source LLMs and the BioASQ dataset.

In [None]:
# Install dependencies
!pip install transformers datasets faiss-cpu sentence-transformers gradio accelerate

In [None]:
# Load BioASQ dataset as biomedical corpus
from datasets import load_dataset
dataset = load_dataset('bioasq_task_b', split='train[:1000]')
corpus = [item['body'] for item in dataset if item.get('body')]

In [None]:
# Create embeddings using sentence-transformers
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

embedder = SentenceTransformer('pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb')
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=False, show_progress_bar=True)

index = faiss.IndexFlatL2(len(corpus_embeddings[0]))
index.add(np.array(corpus_embeddings))

In [None]:
# Load BioBART open-source model
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
model_name = 'bionlp/biobart-v2.0'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
qa_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)

In [None]:
# Define RAG QA function
def rag_qa(query):
    query_embedding = embedder.encode([query])[0]
    D, I = index.search(np.array([query_embedding]), k=5)
    retrieved_docs = [corpus[i] for i in I[0]]
    context = ' '.join(retrieved_docs)
    prompt = f'question: {query} context: {context}'
    result = qa_pipeline(prompt, max_length=256)[0]['generated_text']
    return result

In [None]:
# Gradio Interface
import gradio as gr

demo = gr.Interface(
    fn=rag_qa,
    inputs=gr.Textbox(label='Ask a biomedical question'),
    outputs=gr.Textbox(label='AI-generated answer'),
    title='Biomedical RAG QA (BioASQ Dataset)',
    description='RAG pipeline using open-source biomedical embeddings + BioBART for QA'
)
demo.launch(debug=True)