In [79]:
# Import necessary libraries
import ollama
import gradio as gr
import tempfile
import re

In [80]:
def process_pdf(pdf_path):
    if not pdf_path:
        return None, None, None

    # Load and extract text from PDF
    loader = PyMuPDFLoader(pdf_path)
    documents = loader.load()
    
    # Fix: Use `chunk_overlap`
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = text_splitter.split_documents(documents)
    
    # Generate embeddings
    embeddings = OllamaEmbeddings(model="deepseek-r1:1.5b")
    vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory="./chroma_db")
    retriever = vectorstore.as_retriever()

    return text_splitter, vectorstore, retriever

In [81]:
# Function to combine retrieved documents into a single text block
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [82]:
# Function to call the Ollama LLM for answering questions
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    response = ollama.chat(
        model="deepseek-r1:1.5b",
        messages=[{"role": "user", "content": formatted_prompt}]
    )
    
    # Extract response and clean up unnecessary text
    response_content = response["message"]["content"]
    final_answer = re.sub(r"<think>.*?</think>", "", response_content, flags=re.DOTALL).strip()
    return final_answer

In [83]:
# Retrieval-Augmented Generation (RAG) function
def rag_chain(question, retriever):
    retrieved_docs = retriever.invoke(question)
    if not retrieved_docs:
        return "No relevant information found in the document."
    
    formatted_content = combine_docs(retrieved_docs)
    return ollama_llm(question, formatted_content)

In [84]:
import os
import tempfile

def ask_question(pdf_file, question):
    """Handles PDF processing and answering user questions."""
    temp_pdf_path = None  # Ensure variable is defined before use

    if pdf_file is not None:  # Ensure a file is uploaded
        try:
            temp_pdf_path = pdf_file.name  # Get the actual file path from Gradio

            # Ensure the file exists
            if not os.path.exists(temp_pdf_path):
                return "Error: The uploaded file could not be found."

            # Process the PDF file
            text_splitter, vectorstore, retriever = process_pdf(temp_pdf_path)

            if retriever is None:
                return "Error: PDF processing failed."

            return rag_chain(question, retriever)

        except Exception as e:
            return f"Unexpected Error: {str(e)}"

    else:
        return "No PDF uploaded. Please upload a PDF to get document-based answers."

In [85]:

# Define Gradio interface
interface = gr.Interface(
    fn=ask_question,
    inputs=[
        gr.File(label="Upload PDF (optional)"),  # PDF upload input
        gr.Textbox(label="Question", placeholder="Type your question here..."),  # Text input for questions
    ],
    outputs="text",
    title="Ask a Question About a PDF",
    description="Upload a PDF and ask a question about it. If no PDF is uploaded, you will get a default response.",
)

# Launch the interface
interface.launch()

* Running on local URL:  http://127.0.0.1:7879

To create a public link, set `share=True` in `launch()`.


