In [None]:
# PDF Q&A Chatbot with Gradio UI in One Notebook

# STEP 1: Install Required Packages
!pip install openai langchain langchain-community faiss-cpu gradio PyPDF2 tiktoken --quiet

# STEP 2: Import Libraries
import os
import PyPDF2
import gradio as gr
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document

# STEP 3: Set Your OpenAI API Key
os.environ["OPENAI_API_KEY"] = "your-openai-api-key-here"  # Replace with your OpenAI key

# STEP 4: Helper Function to Extract Text from PDF (in-memory)
def extract_text_from_pdf(file_obj):
    reader = PyPDF2.PdfReader(file_obj)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text
    return text

# STEP 5: Build Vector Index
def create_vector_index(pdf_text):
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_text(pdf_text)
    docs = [Document(page_content=chunk) for chunk in chunks]
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore

# STEP 6: Global Objects (state)
vector_index = None
qa_chain = None

# STEP 7: Upload Handler (with error handling)
def process_pdf(file_path):
    global vector_index, qa_chain
    try:
        if file_path is None:
            return "No file uploaded."

        with open(file_path, "rb") as f:
            pdf_text = extract_text_from_pdf(f)

        if not pdf_text.strip():
            return "No extractable text found in PDF."

        print("First 500 characters of extracted text:", pdf_text[:500])  # Debug info

        vector_index = create_vector_index(pdf_text)
        llm = ChatOpenAI(temperature=0)
        qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_index.as_retriever())
        return "PDF processed successfully! You can now ask questions."

    except Exception as e:
        return f"Error: {str(e)}"

# STEP 8: Q&A Function
def ask_question(question):
    if not qa_chain:
        return "Please upload a PDF first."
    return qa_chain.run(question)

# STEP 9: Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## PDF Q&A Chatbot\nUpload a PDF and ask questions about its content.")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
        upload_output = gr.Textbox(label="Upload Status")

    pdf_input.change(fn=process_pdf, inputs=[pdf_input], outputs=[upload_output])

    question = gr.Textbox(label="Enter your question")
    answer = gr.Textbox(label="Answer")

    ask_btn = gr.Button("Ask")
    ask_btn.click(fn=ask_question, inputs=[question], outputs=[answer])

# STEP 10: Launch Interface
demo.launch(debug=False)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7a6f7a5901652c65e1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


