In [1]:
!pip install gradio langchain chromadb sentence-transformers pypdf transformers accelerate bitsandbytes


Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pypdf
  Downloading pypdf-5.7.0-py3-none-any.whl.metadata (7.2 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.34.1-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_ex

In [3]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.1.0-py3-n

In [4]:
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema.runnable import RunnableMap
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline


In [5]:
def load_pdf_and_embed(pdf_path, persist_directory="db"):
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    # Chunk documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_documents(docs)

    # Embed using sentence-transformers
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectordb = Chroma.from_documents(chunks, embedding=embeddings, persist_directory=persist_directory)

    return vectordb


In [6]:
def load_local_phi_model():
    model_id = "microsoft/phi-3-mini-4k-instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)

    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300, do_sample=True, temperature=0.3)
    llm = HuggingFacePipeline(pipeline=pipe)

    return llm


In [None]:
def create_qa_chain(vectorstore, llm):
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

    # RAG pipeline
    chain = (
        {"context": retriever | RunnablePassthrough(), "question": RunnablePassthrough()}
        | (lambda x: f"Answer the question based on context below.\n\nContext: {x['context']}\n\nQuestion: {x['question']}")
        | llm
        | StrOutputParser()
    )
    return chain


In [7]:
def create_gradio_interface():
    global qa_chain

    def ask_question(query):
        if not qa_chain:
            return "Please upload a PDF first."
        return qa_chain.invoke(query)

    def upload_pdf(file):
        global qa_chain
        vectordb = load_pdf_and_embed(file.name)
        llm = load_local_phi_model()
        qa_chain = create_qa_chain(vectordb, llm)
        return "✅ PDF uploaded and indexed. Ask your questions now."

    with gr.Blocks() as demo:
        gr.Markdown("# 🧠 Local PDF Question Answering\nUpload a PDF and ask questions using Phi-3 model locally.")

        with gr.Row():
            file_input = gr.File(label="Upload PDF")
            upload_btn = gr.Button("Load PDF")
            status = gr.Textbox(label="Status")

        upload_btn.click(upload_pdf, inputs=[file_input], outputs=[status])

        with gr.Row():
            query = gr.Textbox(label="Your Question")
            ask_btn = gr.Button("Ask")
            answer = gr.Textbox(label="Answer")

        ask_btn.click(ask_question, inputs=[query], outputs=[answer])

    return demo


In [8]:
qa_chain = None
demo = create_gradio_interface()
demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3c7231f5f6892bda94.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


