
**Imports and Library Installation**

In [None]:
!pip install gradio langchain PyPDF2 chromadb sentence-transformers requests
!pip install -U langchain-community



Importing Libraries

In [None]:
import requests
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr
import os


API Setup

In [None]:
API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-large"
HF_TOKEN = os.getenv("HF_API_TOKEN")
headers = {"Authorization": f"Bearer {HF_TOKEN}"}


Function to Query Hugging Face API
python
Copy
Edit


In [None]:
def query_hf_api(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API Error: {response.status_code}, {response.text}")


 Extract Text from a PDF File

In [None]:
def get_pdf_text(pdf_file):
    text = ""
    pdf_reader = PdfReader(pdf_file)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text


**Split Text into Smaller Chunks**

In [None]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=400)
    chunks = text_splitter.split_text(text)
    return chunks


Create a Vector Store for Text

In [None]:
def get_vector_store(text_chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = Chroma.from_texts(text_chunks, embedding=embeddings, persist_directory="chroma_db")
    vector_store.persist()
    return vector_store


Find Answers Using the Vector Store
python
Copy
Edit


In [None]:
def ask_question(vector_store, user_question):
    retriever = vector_store.as_retriever()
    relevant_docs = retriever.get_relevant_documents(user_question)

    context = "\n".join([doc.page_content for doc in relevant_docs])

    payload = {"inputs": f"Context: {context}\nQuestion: {user_question}\nAnswer:"}
    response = query_hf_api(payload)
    return response[0]["generated_text"]


Main Function for Chat Interface

In [None]:
def chat_with_pdf(pdf_file, user_question):
    raw_text = get_pdf_text(pdf_file.name)
    text_chunks = get_text_chunks(raw_text)
    vector_store = get_vector_store(text_chunks)
    answer = ask_question(vector_store, user_question)
    return answer


Defining the Interface Function

In [None]:
def gradio_interface(pdf_file, user_question):
    return chat_with_pdf(pdf_file, user_question)


Creating the Gradio Interface

In [None]:
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.File(label="Upload your PDF file"),
        gr.Textbox(label="Ask a question about the PDF")
    ],
    outputs="text",
    title="Interactive PDF Question Answering",
    description="Upload a PDF, ask a question, and get the answer based on the PDF content using FLAN-T5 model."
)


Launching the Interface

In [None]:
iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://197fc822e8ebf53742.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


