In [3]:
import gradio as gr
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import pipeline

In [4]:
class Retriever:
    def __init__(self, file_path):
        self.file_path = file_path
        self.texts = self._load_and_split_text()
        self.vector_store, self.embeddings = self._build_index()

    def _load_and_split_text(self):
        loader = PyPDFLoader(self.file_path)
        pages = loader.load()
        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        texts = splitter.split_documents(pages)
        return texts

    def _build_index(self):
        model = SentenceTransformer('all-MiniLM-L6-v2')
        embeddings = model.encode([text.page_content for text in self.texts])
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))
        return index, model

    def retrieve(self, query, top_k=3):
        query_emb = self.embeddings.encode([query])
        distances, indices = self.vector_store.search(np.array(query_emb), top_k)
        return [self.texts[i].page_content for i in indices[0]]


In [5]:
class Generator:
    def __init__(self):
        # Using distilgpt2 which is much lighter and can run on CPU
        self.generator = pipeline("text-generation", model="distilgpt2", truncation=True)

    def generate(self, prompt):
        response = self.generator(prompt, max_length=200, num_return_sequences=1)
        return response[0]['generated_text']


In [6]:
class ChatBot:
    def __init__(self, retriever, generator):
        self.retriever = retriever
        self.generator = generator

    def get_response(self, query):
        context = "\n".join(self.retriever.retrieve(query))
        prompt = f"Context: {context}\nUser Query: {query}\nResponse:"
        return self.generator.generate(prompt)


In [11]:
# Initialize the components (retriever, generator, and chatbot)
retriever = Retriever("../data/medical_corpus.pdf")  # Update the path to your PDF file
generator = Generator()
chatbot = ChatBot(retriever, generator)

# Define the function that will handle user input
def chat_with_bot(query):
    return chatbot.get_response(query)

# Create a Gradio interface
iface = gr.Interface(
    fn=chat_with_bot, 
    inputs="text", 
    outputs="text", 
    title="RAG-Based Medical Chatbot", 
    description="Ask medical questions, and the bot will provide responses based on the medical corpus."
)

# Launch the interface
iface.launch()


SSLError: (MaxRetryError("HTTPSConnectionPool(host='cdn-lfs.hf.co', port=443): Max retries exceeded with url: /sentence-transformers/all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1736360706&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczNjM2MDcwNn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9zZW50ZW5jZS10cmFuc2Zvcm1lcnMvYWxsLU1pbmlMTS1MNi12Mi81M2FhNTExNzJkMTQyYzg5ZDkwMTJjY2UxNWFlNGQ2Y2MwY2E2ODk1ODk1MTE0Mzc5Y2FjYjRmYWIxMjhkOWRiP3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiJ9XX0_&Signature=Js-OblR4ExVl7~XVzAkI5VMSwJc7f5XHYapXFopihKFsIOZKDkNBWYL15xRsbfEr9hSuxFuIXtm6FdVuAo74RbJ2ftcx-yN3y1B11ISvRJJkCkw6NaPf55WGYGpVx7rm7hee5A2A~10tq6nFfY0Y2inU2SvrDxff30xxoEDZ~VlFNuCY2zAmZD66QUQnwinP7p63TJ~NCko3q5MuVcxeYfU5JYBSqhXG-O3vQQVujOWaMGqot5lKVs9PraMZLt0MlbuftSLNadt6SRBS~QwoBS6O1~Od50SLNbI~zhnkdLjEcLPzQ4DueAvpsqsLHheC~5sMUvIk1xzC9UyqTjQxyw__&Key-Pair-Id=K3RPWS32NSSJCE (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1149)')))"), '(Request ID: f698094b-c394-4904-87ff-44158ed5b777)')

In [8]:
pip install pypdf

Collecting pypdf
  Using cached pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Using cached pypdf-5.1.0-py3-none-any.whl (297 kB)
Installing collected packages: pypdf
Successfully installed pypdf-5.1.0
Note: you may need to restart the kernel to use updated packages.
