In [None]:
"""
Local PDF Chat with Gradio – 100 % offline
Includes a static “About” tab with personal & software info
"""

import os
import re
from difflib import get_close_matches

import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter

# --------------------------------------------------
# CONSTANTS
# --------------------------------------------------
MODEL_NAME = "llama3"
ROOT = "vectorstores"
os.makedirs(ROOT, exist_ok=True)

# --------------------------------------------------
# HELPER FUNCTIONS
# --------------------------------------------------
def build_vectorstore(pdf_path: str) -> str:
    """Return path of an up-to-date vectorstore for the given PDF."""
    pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]
    vs_path = os.path.join(ROOT, pdf_name)

    if os.path.isdir(vs_path) and os.listdir(vs_path):
        return vs_path  # already ingested

    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    splits = splitter.split_documents(docs)

    embeddings = OllamaEmbeddings(model=MODEL_NAME)
    Chroma.from_documents(
        documents=splits,
        embedding=embeddings,
        persist_directory=vs_path,
    )
    return vs_path


def load_qa_chain(vs_path: str) -> RetrievalQA:
    embeddings = OllamaEmbeddings(model=MODEL_NAME)
    vectorstore = Chroma(
        embedding_function=embeddings,
        persist_directory=vs_path,
    )
    retriever = vectorstore.as_retriever(search_kwargs={"k": 8})
    llm = Ollama(model=MODEL_NAME)
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        return_source_documents=False,
        chain_type="stuff"
    )


# --------------------------------------------------
# SESSION STATE
# --------------------------------------------------
class State:
    pdf_path: str = ""
    vocab: list = []
    qa_chain = None


state = State()

# --------------------------------------------------
# GRADIO CALLBACKS
# --------------------------------------------------
def upload_pdf(pdf_file):
    """Process PDF and return status + make Chat tab visible."""
    if pdf_file is None:
        return "No file uploaded.", gr.update(visible=False)

    state.pdf_path = pdf_file
    vs_path = build_vectorstore(pdf_file)
    state.qa_chain = load_qa_chain(vs_path)

    # build vocabulary for fuzzy spell correction
    loader = PyPDFLoader(state.pdf_path)
    text = " ".join([d.page_content for d in loader.load()]).lower()
    tokens = re.findall(r"\b\w+\b", text)
    state.vocab = list(set(tokens))

    return f"✅ PDF ingested → vectorstore at `{vs_path}`.", gr.update(visible=True)


def chat(history, question):
    if state.qa_chain is None:
        history.append((question, "Please upload a PDF first."))
        return history, ""

    # fuzzy spell correction
    words = question.split()
    fixed = [
        get_close_matches(w.lower(), state.vocab, n=1, cutoff=0.8)[0]
        if get_close_matches(w.lower(), state.vocab, n=1, cutoff=0.8)
        else w
        for w in words
    ]
    corrected = " ".join(fixed)

    if corrected != question:
        history.append((question, f"✏️ Corrected to: {corrected}"))
        question = corrected

    answer = state.qa_chain.invoke(question)["result"].strip()
    if not answer or "no answer" in answer.lower() or "i don't know" in answer.lower():
        answer = "🔍 No."
    history.append((question, answer))
    return history, ""


# --------------------------------------------------
# GRADIO UI
# --------------------------------------------------
with gr.Blocks(title="Local PDF Chat") as app:
    gr.Markdown("# 📄 PDF Inquiry by Habib (Ollama – 100 % offline)")

    # ---------- NEW STATIC ABOUT TAB ----------
    with gr.Tab("About"):
        gr.Markdown(
            """
            ## 🤖 Local PDF Chat  
            A completely **offline** Gradio application that lets you
            upload any PDF, index it locally with **Chroma + Ollama**, and
            then ask questions that are answered **only** from the document.

            ### 🔑 Key Features
            - No external API keys  
            - On-device embedding & LLM (Ollama)  
            - Fuzzy spell-correction based on PDF vocabulary  

            ## 👤 About the Author  
            **Dr. Habib Ullah Manzoor**  
            - Ph.D. in Trustworthy Distributed Computing, University of Glasgow  
            - 10+ years of experience in machine learning, federated learning, and cybersecurity  
            - Core skills: Python, PyTorch, TensorFlow, LangChain, AWS, IoT, Explainable AI  
            - [LinkedIn](https://www.linkedin.com/in/habib-ullah-manzoor-phd-19198994/) | [Google Scholar](https://scholar.google.com.pk/citations?user=tKDhmdAAAAAJ&hl=en) | [ORCID](https://orcid.org/0000-0003-0192-7353)

            Contact:  
            📧 habibullahmanzoor@gmail.com 
            """
        )

    # ---------- UPLOAD PDF TAB ----------
    with gr.Tab("Upload PDF"):
        pdf_input = gr.File(label="Choose PDF", file_types=[".pdf"])
        status = gr.Textbox(label="Status", interactive=False)
        upload_btn = gr.Button("Process PDF")

    # ---------- CHAT TAB ----------
    with gr.Tab("Chat") as chat_tab:
        chatbot = gr.Chatbot(label="Conversation", height=400)
        msg = gr.Textbox(label="Your question", placeholder="Ask anything about the uploaded PDF…")
        clear = gr.Button("Clear")

        msg.submit(chat, [chatbot, msg], [chatbot, msg])
        clear.click(lambda: None, None, chatbot, queue=False)

    # wire upload button to show Chat tab
    upload_btn.click(
        upload_pdf,
        inputs=pdf_input,
        outputs=[status, chat_tab]
    )
    chat_tab.visible = False  # hidden until PDF processed

# --------------------------------------------------
# LAUNCH
# --------------------------------------------------
if __name__ == "__main__":
    app.launch()

  chatbot = gr.Chatbot(label="Conversation", height=400)


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


  embeddings = OllamaEmbeddings(model=MODEL_NAME)
  llm = Ollama(model=MODEL_NAME)
