<a href="https://colab.research.google.com/github/harinijs03/2023103549_SDC_assignment/blob/main/Medicalconsultant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
!pip install -q langchain pypdf faiss-cpu gradio sentence-transformers transformers

import os
import tempfile
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline

# Set up text processing
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# Initialize embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Initialize LLM (Flan-T5 for efficient text processing)
summarizer = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=512,
    truncation=True
)
llm = HuggingFacePipeline(pipeline=summarizer)

def analyze_document(file, question):
    """Analyze the uploaded document and answer questions"""
    if not file:
        return "⚠️ Please upload a PDF document first"
    if not question:
        return "⚠️ Please enter a question about the document"

    try:
        # Save uploaded file temporarily
        temp_dir = tempfile.mkdtemp()
        file_path = os.path.join(temp_dir, "uploaded_file.pdf")
        with open(file_path, "wb") as f:
            f.write(file)

        # Load and split the PDF
        loader = PyPDFLoader(file_path)
        pages = loader.load()
        docs = text_splitter.split_documents(pages)

        # Create searchable vector database
        db = FAISS.from_documents(docs, embeddings)

        # Create question-answering chain
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=db.as_retriever(search_kwargs={"k": 3})
        )

        # Get answer with page references
        result = qa_chain({"query": question})
        answer = result["result"]

        # Add page references if available
        if 'source_documents' in result:
            pages = {str(doc.metadata.get('page', '?')) for doc in result['source_documents']}
            answer += f"\n\n(Found in pages: {', '.join(sorted(pages))})"

        return answer

    except Exception as e:
        return f"❌ Error processing document: {str(e)}"
    finally:
        # Clean up temporary files
        if 'temp_dir' in locals():
            for root, dirs, files in os.walk(temp_dir, topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
            os.rmdir(temp_dir)

# Create Gradio interface
with gr.Blocks(title="Medical Document Consultant") as app:
    gr.Markdown("## 📄 Medical Document Consultant")
    gr.Markdown("Upload a medical document and ask questions about its content")

    with gr.Row():
        with gr.Column():
            file_input = gr.File(
                label="Upload Medical PDF",
                type="binary",
                file_types=[".pdf"]
            )
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="What would you like to know about this document?"
            )
            submit_btn = gr.Button("Analyze")

        with gr.Column():
            output = gr.Textbox(
                label="Answer",
                interactive=False,
                lines=10
            )

    submit_btn.click(
        fn=analyze_document,
        inputs=[file_input, question_input],
        outputs=output
    )

# Launch the app
app.launch(share=True)

Device set to use cpu


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://231800f6e153790652.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


