In [1]:
!pip install groq sentence-transformers faiss-cpu PyPDF2 PyMuPDF numpy gradio

Collecting groq
  Downloading groq-0.25.0-py3-none-any.whl.metadata (15 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting PyMuPDF
  Downloading pymupdf-1.26.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting gradio
  Downloading gradio-5.31.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Col

In [None]:
import os
import json
import numpy as np
from typing import List, Dict, Any, Optional, Tuple
import re
from dataclasses import dataclass, field
from datetime import datetime
import PyPDF2
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
import gradio as gr
import tempfile
import shutil

# Configuration
GROQ_API_KEY = "Your_Grok_API_KEY"

@dataclass
class PDFDocument:
    """Represents a PDF medical document"""
    filename: str
    title: str
    content: str
    sections: Dict[str, str] = field(default_factory=dict)
    metadata: Dict[str, Any] = field(default_factory=dict)
    page_count: int = 0
    embedding: np.ndarray = None
    chunks: List[Dict[str, Any]] = field(default_factory=list)

class PDFProcessor:
    """Processes PDF files and extracts medical content"""

    def __init__(self):
        self.medical_sections = [
            'abstract', 'introduction', 'methods', 'methodology', 'results',
            'discussion', 'conclusion', 'conclusions', 'background',
            'materials and methods', 'clinical findings', 'case study',
            'literature review', 'analysis', 'findings', 'summary'
        ]

    def extract_pdf_content(self, pdf_path: str) -> PDFDocument:
        """Extract content from PDF using multiple methods for robustness"""

        filename = os.path.basename(pdf_path)

        # Try PyMuPDF first (better for complex layouts)
        try:
            content, metadata, page_count = self._extract_with_pymupdf(pdf_path)
        except Exception as e:
            try:
                content, metadata, page_count = self._extract_with_pypdf2(pdf_path)
            except Exception as e2:
                return PDFDocument(
                    filename=filename,
                    title=filename,
                    content="Failed to extract PDF content",
                    page_count=0
                )

        # Extract title from content or use filename
        title = self._extract_title(content) or filename

        # Parse sections
        sections = self._parse_medical_sections(content)

        # Create chunks for better retrieval
        chunks = self._create_content_chunks(content, filename)

        return PDFDocument(
            filename=filename,
            title=title,
            content=content,
            sections=sections,
            metadata=metadata,
            page_count=page_count,
            chunks=chunks
        )

    def _extract_with_pymupdf(self, pdf_path: str) -> tuple:
        """Extract using PyMuPDF (fitz)"""
        doc = fitz.open(pdf_path)
        content = ""
        metadata = {}

        # Extract metadata
        if doc.metadata:
            metadata = doc.metadata

        # Extract text from all pages
        for page_num in range(len(doc)):
            page = doc[page_num]
            text = page.get_text()
            content += f"\n--- Page {page_num + 1} ---\n{text}"

        page_count = len(doc)
        doc.close()
        return content, metadata, page_count

    def _extract_with_pypdf2(self, pdf_path: str) -> tuple:
        """Extract using PyPDF2 as fallback"""
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            content = ""
            metadata = {}

            # Extract metadata
            if pdf_reader.metadata:
                metadata = dict(pdf_reader.metadata)

            # Extract text from all pages
            for page_num, page in enumerate(pdf_reader.pages):
                text = page.extract_text()
                content += f"\n--- Page {page_num + 1} ---\n{text}"

            return content, metadata, len(pdf_reader.pages)

    def _extract_title(self, content: str) -> Optional[str]:
        """Extract title from PDF content"""
        lines = content.split('\n')

        # Look for title in first few lines
        for line in lines[:20]:
            line = line.strip()
            if len(line) > 10 and len(line) < 200:
                # Skip common headers
                if not any(skip in line.lower() for skip in ['page', 'doi:', 'pmid:', 'copyright']):
                    if not line.isdigit() and not line.startswith('---'):
                        return line

        return None

    def _parse_medical_sections(self, content: str) -> Dict[str, str]:
        """Parse medical paper sections"""
        sections = {}
        content_lower = content.lower()

        for section_name in self.medical_sections:
            # Look for section headers
            patterns = [
                f"\\n{section_name}\\n",
                f"\\n{section_name}:",
                f"\\n{section_name.upper()}\\n",
                f"\\n{section_name.upper()}:",
                f"\\n{section_name.title()}\\n",
                f"\\n{section_name.title()}:"
            ]

            for pattern in patterns:
                matches = list(re.finditer(pattern, content_lower))
                for match in matches:
                    start_pos = match.end()

                    # Find next section or end
                    end_pos = len(content)
                    for other_section in self.medical_sections:
                        if other_section != section_name:
                            next_patterns = [
                                f"\\n{other_section}\\n",
                                f"\\n{other_section}:",
                                f"\\n{other_section.upper()}\\n",
                                f"\\n{other_section.upper()}:"
                            ]
                            for next_pattern in next_patterns:
                                next_match = re.search(next_pattern, content_lower[start_pos:])
                                if next_match:
                                    end_pos = min(end_pos, start_pos + next_match.start())

                    section_content = content[start_pos:end_pos].strip()
                    if len(section_content) > 50:  # Only store substantial sections
                        sections[section_name] = section_content
                        break

                if section_name in sections:
                    break

        return sections

    def _create_content_chunks(self, content: str, filename: str, chunk_size: int = 1000, overlap: int = 200) -> List[Dict[str, Any]]:
        """Create overlapping chunks for better retrieval"""
        chunks = []
        words = content.split()

        for i in range(0, len(words), chunk_size - overlap):
            chunk_words = words[i:i + chunk_size]
            chunk_text = ' '.join(chunk_words)

            # Skip very short chunks
            if len(chunk_text.strip()) < 100:
                continue

            chunks.append({
                'text': chunk_text,
                'chunk_id': f"{filename}_chunk_{len(chunks)}",
                'start_word': i,
                'word_count': len(chunk_words)
            })

        return chunks

class PDFVectorStore:
    """Vector store for PDF documents using FAISS"""

    def __init__(self, embedding_model: str = "all-MiniLM-L6-v2"):
        self.encoder = SentenceTransformer(embedding_model)
        self.documents: List[PDFDocument] = []
        self.chunks: List[Dict[str, Any]] = []
        self.chunk_to_doc: Dict[int, int] = {}
        self.index = None
        self.dimension = 384

    def add_pdf_documents(self, documents: List[PDFDocument]):
        """Add PDF documents to the vector store"""
        chunk_embeddings = []

        for doc_idx, doc in enumerate(documents):
            self.documents.append(doc)

            # Process document chunks
            for chunk in doc.chunks:
                chunk_idx = len(self.chunks)
                self.chunks.append({
                    **chunk,
                    'document_idx': doc_idx,
                    'document_title': doc.title,
                    'filename': doc.filename
                })
                self.chunk_to_doc[chunk_idx] = doc_idx

                # Create embedding for chunk
                embedding = self.encoder.encode(chunk['text'])
                chunk_embeddings.append(embedding)

        # Build FAISS index with chunks
        if chunk_embeddings:
            embeddings = np.array(chunk_embeddings)
            self.dimension = embeddings.shape[1]

            # Create FAISS index
            self.index = faiss.IndexFlatIP(self.dimension)

            # Normalize embeddings for cosine similarity
            faiss.normalize_L2(embeddings)
            self.index.add(embeddings)

    def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
        """Search for relevant content chunks"""
        if not self.index or not self.chunks:
            return []

        # Encode query
        query_embedding = self.encoder.encode(query).reshape(1, -1)
        faiss.normalize_L2(query_embedding)

        # Search chunks
        scores, indices = self.index.search(query_embedding, k)

        # Return relevant chunks with document info
        results = []
        for i, idx in enumerate(indices[0]):
            if idx < len(self.chunks):
                chunk = self.chunks[idx]
                doc_idx = self.chunk_to_doc[idx]
                document = self.documents[doc_idx]

                results.append({
                    'chunk': chunk,
                    'document': document,
                    'score': float(scores[0][i])
                })

        return results

class PDFMedicalRAG:
    """RAG system for PDF medical literature summarization"""

    def __init__(self, groq_api_key: str):
        self.client = Groq(api_key=groq_api_key)
        self.pdf_processor = PDFProcessor()
        self.vector_store = PDFVectorStore()
        self.documents: List[PDFDocument] = []

    def load_pdf(self, pdf_path: str) -> PDFDocument:
        """Load and process a single PDF"""
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"PDF file not found: {pdf_path}")

        document = self.pdf_processor.extract_pdf_content(pdf_path)
        self.documents.append(document)

        # Update vector store
        self.vector_store.add_pdf_documents([document])

        return document

    def summarize_pdf(self, query: str = None, summary_type: str = "comprehensive") -> Dict[str, Any]:
        """Summarize PDF content using RAG"""

        if not self.documents:
            return {
                "error": "No PDF documents loaded. Please load PDFs first.",
                "timestamp": datetime.now().isoformat()
            }

        # If no query provided, create a general summary query
        if not query:
            query = "Provide a comprehensive summary of the main findings, methodology, and conclusions of this medical literature."

        # Search for relevant content
        search_results = self.vector_store.search(query, k=8)

        if not search_results:
            return {
                "query": query,
                "error": "No relevant content found in the loaded PDFs.",
                "timestamp": datetime.now().isoformat()
            }

        # Generate summary
        summary = self._generate_pdf_summary(query, search_results, summary_type)

        # Prepare source information
        sources = []
        seen_docs = set()
        for result in search_results:
            doc = result['document']
            if doc.filename not in seen_docs:
                sources.append({
                    'filename': doc.filename,
                    'title': doc.title,
                    'pages': doc.page_count,
                    'sections_found': list(doc.sections.keys()) if doc.sections else []
                })
                seen_docs.add(doc.filename)

        return {
            "query": query,
            "summary": summary,
            "sources": sources,
            "relevant_chunks": len(search_results),
            "timestamp": datetime.now().isoformat()
        }

    def _generate_pdf_summary(self, query: str, search_results: List[Dict], summary_type: str) -> str:
        """Generate summary from PDF content using Groq"""

        # Prepare context from search results
        context = "Relevant Content from Medical PDFs:\n\n"

        for i, result in enumerate(search_results, 1):
            chunk = result['chunk']
            doc = result['document']

            context += f"Source {i} - {doc.filename}:\n"
            context += f"Content: {chunk['text'][:800]}...\n"
            context += f"Relevance Score: {result['score']:.3f}\n\n"

        # Define prompts for different summary types
        prompts = {
            "comprehensive": """You are a medical expert analyzing PDF documents. Create a comprehensive summary that includes:

1. **Document Overview**: What type of medical literature this is
2. **Key Findings**: Main research outcomes and clinical findings
3. **Methodology**: Research approaches and study designs used
4. **Clinical Significance**: Practical applications for healthcare
5. **Evidence Quality**: Strength and reliability of the findings
6. **Conclusions**: Main takeaways and implications

Query: {query}

{context}

Provide a well-structured, evidence-based summary suitable for healthcare professionals.""",

            "clinical": """You are a clinician reviewing medical PDF documents. Create a clinical summary focusing on:

1. **Clinical Relevance**: Direct applications to patient care
2. **Treatment Implications**: How this affects current practice
3. **Patient Outcomes**: Impact on patient health and wellbeing
4. **Evidence Level**: Quality and reliability of clinical evidence
5. **Practical Recommendations**: Actionable clinical guidance
6. **Safety Considerations**: Important warnings or contraindications

Query: {query}

{context}

Focus on practical, actionable information for healthcare providers.""",

            "research": """You are a medical researcher analyzing PDF literature. Create a research-focused summary including:

1. **Research Questions**: What the study aimed to investigate
2. **Methodology**: Study design, sample size, and analytical methods
3. **Statistical Results**: Key quantitative findings and significance
4. **Limitations**: Study limitations and potential biases
5. **Implications**: How this advances medical knowledge
6. **Future Directions**: Recommended follow-up research

Query: {query}

{context}

Emphasize scientific rigor and research methodology.""",

            "executive": """You are creating an executive summary of medical PDF documents. Provide:

1. **Key Points**: Most important findings in bullet points
2. **Clinical Impact**: Bottom-line implications for healthcare
3. **Action Items**: What should be done based on this information
4. **Risk/Benefit**: Important considerations for decision-making
5. **Timeline**: Any time-sensitive information
6. **Next Steps**: Recommended follow-up actions

Query: {query}

{context}

Keep it concise but comprehensive for decision-makers."""
        }

        prompt = prompts.get(summary_type, prompts["comprehensive"]).format(
            query=query, context=context
        )

        try:
            completion = self.client.chat.completions.create(
                model="meta-llama/llama-4-scout-17b-16e-instruct",
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert medical researcher and clinician with extensive experience in analyzing medical literature and creating evidence-based summaries."
                    },
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=0.3,
                max_completion_tokens=1024,
                top_p=0.9,
                stream=False,
                stop=None,
            )

            return completion.choices[0].message.content

        except Exception as e:
            return f"Error generating summary: {str(e)}"

    def get_document_info(self) -> List[Dict[str, Any]]:
        """Get information about loaded documents"""
        return [
            {
                'filename': doc.filename,
                'title': doc.title,
                'pages': doc.page_count,
                'sections': list(doc.sections.keys()),
                'chunks': len(doc.chunks),
                'metadata': doc.metadata
            }
            for doc in self.documents
        ]

    def clear_documents(self):
        """Clear all loaded documents"""
        self.documents = []
        self.vector_store = PDFVectorStore()

# Global RAG system instance
rag_system = PDFMedicalRAG(GROQ_API_KEY)

def upload_and_process_pdf(pdf_files) -> Tuple[str, str]:
    """Handle PDF upload and processing"""
    if not pdf_files:
        return "❌ No files uploaded", ""

    try:
        # Clear previous documents
        rag_system.clear_documents()

        results = []
        document_info = []

        for pdf_file in pdf_files:
            # Create temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                shutil.copy2(pdf_file.name, tmp_file.name)

                try:
                    # Process PDF
                    document = rag_system.load_pdf(tmp_file.name)

                    results.append(f"✅ **{document.filename}**")
                    results.append(f"   - Title: {document.title}")
                    results.append(f"   - Pages: {document.page_count}")
                    results.append(f"   - Sections: {len(document.sections)}")
                    results.append(f"   - Chunks: {len(document.chunks)}")
                    results.append("")

                    # Add to document info
                    document_info.append({
                        'filename': document.filename,
                        'title': document.title,
                        'pages': document.page_count,
                        'sections': list(document.sections.keys())
                    })

                except Exception as e:
                    results.append(f"❌ **{os.path.basename(pdf_file.name)}**: {str(e)}")
                    results.append("")

                finally:
                    # Clean up temporary file
                    os.unlink(tmp_file.name)

        status_message = "\n".join(results)

        # Create document overview
        overview = "## 📚 Loaded Documents\n\n"
        for doc_info in document_info:
            overview += f"**{doc_info['title']}**\n"
            overview += f"- File: {doc_info['filename']}\n"
            overview += f"- Pages: {doc_info['pages']}\n"
            if doc_info['sections']:
                overview += f"- Sections: {', '.join(doc_info['sections'][:5])}{'...' if len(doc_info['sections']) > 5 else ''}\n"
            overview += "\n"

        return status_message, overview

    except Exception as e:
        return f"❌ Error processing PDFs: {str(e)}", ""

def generate_summary(query, summary_type, progress=gr.Progress()):
    """Generate summary with progress tracking"""
    if not rag_system.documents:
        return "❌ Please upload PDF files first!", ""

    try:
        progress(0.1, desc="Searching relevant content...")

        # If query is empty, use default
        if not query.strip():
            query = "Provide a comprehensive summary of this medical literature"

        progress(0.3, desc="Retrieving relevant sections...")

        # Generate summary
        result = rag_system.summarize_pdf(query, summary_type)

        progress(0.7, desc="Generating summary...")

        if "error" in result:
            return f"❌ {result['error']}", ""

        progress(0.9, desc="Formatting results...")

        # Format summary
        summary_text = f"## 🎯 Query\n{result['query']}\n\n"
        summary_text += f"## 📋 Summary ({summary_type.title()})\n\n{result['summary']}\n\n"

        # Format sources
        sources_text = "## 📖 Sources\n\n"
        for i, source in enumerate(result['sources'], 1):
            sources_text += f"**{i}. {source['filename']}**\n"
            sources_text += f"- Title: {source['title']}\n"
            sources_text += f"- Pages: {source['pages']}\n"
            if source['sections_found']:
                sources_text += f"- Sections: {', '.join(source['sections_found'])}\n"
            sources_text += "\n"

        sources_text += f"*Retrieved {result['relevant_chunks']} relevant content chunks*\n"
        sources_text += f"*Generated at: {result['timestamp']}*"

        progress(1.0, desc="Complete!")

        return summary_text, sources_text

    except Exception as e:
        return f"❌ Error generating summary: {str(e)}", ""

def clear_all():
    """Clear all documents and reset"""
    rag_system.clear_documents()
    return "✅ All documents cleared!", "", "", ""

# Create Gradio Interface
def create_interface():
    """Create the Gradio interface"""

    theme = gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="sky",
        neutral_hue="slate",
    )

    with gr.Blocks(theme=theme, title="🏥 Medical Literature RAG Summarizer") as interface:

        # Header
        gr.Markdown("""
        # 🏥 Medical Literature RAG Summarizer

        Upload medical PDFs and get AI-powered summaries using Retrieval-Augmented Generation.
        Ask specific questions about your documents and get evidence-based answers.
        """)

        with gr.Row():
            with gr.Column(scale=1):
                # File Upload Section
                gr.Markdown("## 📁 Upload Medical PDFs")

                pdf_files = gr.Files(
                    label="Upload PDF Files",
                    file_types=[".pdf"],
                    file_count="multiple",
                    height=150
                )

                upload_btn = gr.Button(
                    "🔄 Process PDFs",
                    variant="primary",
                    size="lg"
                )

                upload_status = gr.Textbox(
                    label="Processing Status",
                    lines=8,
                    max_lines=15,
                    interactive=False
                )

                clear_btn = gr.Button(
                    "🗑️ Clear All",
                    variant="secondary"
                )

            with gr.Column(scale=2):
                # Document Overview
                gr.Markdown("## 📚 Document Overview")

                document_overview = gr.Markdown(
                    value="No documents loaded yet. Upload PDFs to get started.",
                    height=300
                )

        gr.Markdown("---")

        # Query Section
        with gr.Row():
            with gr.Column():
                gr.Markdown("## 🔍 Query Your Documents")

                query_input = gr.Textbox(
                    label="Your Question",
                    placeholder="What are the main findings? What methodology was used? What are the clinical implications?",
                    lines=3,
                    info="Leave empty for a general summary"
                )

                summary_type = gr.Dropdown(
                    label="Summary Type",
                    choices=[
                        ("Comprehensive", "comprehensive"),
                        ("Clinical Focus", "clinical"),
                        ("Research Focus", "research"),
                        ("Executive Summary", "executive")
                    ],
                    value="comprehensive",
                    info="Choose the type of summary you need"
                )

                generate_btn = gr.Button(
                    "✨ Generate Summary",
                    variant="primary",
                    size="lg"
                )

        # Results Section
        with gr.Row():
            with gr.Column(scale=2):
                summary_output = gr.Markdown(
                    label="Summary",
                    height=500
                )

            with gr.Column(scale=1):
                sources_output = gr.Markdown(
                    label="Sources & References",
                    height=500
                )

        # Footer
        gr.Markdown("""
        ---

        ### 💡 Tips for Better Results:
        - **Specific questions** get more focused answers
        - **Clinical focus** emphasizes patient care applications
        - **Research focus** highlights methodology and statistical findings
        - **Executive summary** provides concise key points for decision-makers

        ### 🔧 Powered by:
        - **Groq** (Llama-4-Scout-17B) for AI summarization
        - **FAISS** for fast semantic search
        - **SentenceTransformers** for document embeddings
        """)

        # Event Handlers
        upload_btn.click(
            fn=upload_and_process_pdf,
            inputs=[pdf_files],
            outputs=[upload_status, document_overview],
            show_progress=True
        )

        generate_btn.click(
            fn=generate_summary,
            inputs=[query_input, summary_type],
            outputs=[summary_output, sources_output],
            show_progress=True
        )

        clear_btn.click(
            fn=clear_all,
            outputs=[upload_status, document_overview, summary_output, sources_output]
        )

        # Example queries
        examples = gr.Examples(
            examples=[
                ["What are the main clinical findings?", "clinical"],
                ["What methodology was used in this research?", "research"],
                ["What are the key takeaways for decision makers?", "executive"],
                ["Summarize the treatment outcomes and effectiveness", "comprehensive"],
                ["Are there any safety concerns or contraindications mentioned?", "clinical"],
                ["What are the limitations of this study?", "research"]
            ],
            inputs=[query_input, summary_type],
            label="💡 Example Questions"
        )

    return interface

def main():
    """Launch the Gradio interface"""
    interface = create_interface()

    # Launch with custom settings
    interface.launch(
        share=True,  # Create public link
        server_name="0.0.0.0",  # Allow external access
        server_port=7860,
        show_api=False,
        favicon_path=None,
        ssl_verify=False
    )

if __name__ == "__main__":
    print("🚀 Starting Medical Literature RAG Summarizer...")
    print("📋 Installing required packages if missing...")

    # Check for required packages
    required_packages = [
        "gradio", "groq", "sentence-transformers",
        "faiss-cpu", "PyPDF2", "PyMuPDF", "numpy"
    ]

    missing_packages = []
    for package in required_packages:
        try:
            __import__(package.replace("-", "_"))
        except ImportError:
            missing_packages.append(package)

    if missing_packages:
        print(f"❌ Missing packages: {', '.join(missing_packages)}")
        print(f"📦 Install with: pip install {' '.join(missing_packages)}")
        exit(1)

    print("✅ All packages available!")
    print("🌐 Launching web interface...")

    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🚀 Starting Medical Literature RAG Summarizer...
📋 Installing required packages if missing...
❌ Missing packages: faiss-cpu, PyMuPDF
📦 Install with: pip install faiss-cpu PyMuPDF
✅ All packages available!
🌐 Launching web interface...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://064d23e15834940ac5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
