<a href="https://colab.research.google.com/github/dp-93/RAG-PDF-chatbot/blob/main/Chat_with_Your_PDF_(RAG).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install -q langchain langchain_google_genai PyPDF2 faiss-cpu langchain-community

In [11]:
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from PyPDF2 import PdfReader

# --- Configuration ---
os.environ["GOOGLE_API_KEY"] = "AIzaSyBHQAZt0KaG4QSJ0pLUapMoIInKbjDWrw8" # Make sure your key is here
PDF_FILE_PATH = "Java 8 feature.pdf" # Make sure your filename is here

# --- PDF Processing and Text Extraction ---
def get_pdf_text(pdf_path):
    text = ""
    try:
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()
    except FileNotFoundError:
        print(f"Error: The file '{pdf_path}' was not found. Please upload your PDF and check the filename.")
        return None
    return text

# --- Text Chunking ---
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

# --- Embedding and Vector Store Creation ---
def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")
    print("Vector store created and saved.")

# --- Conversational Chain Setup ---
def get_conversational_chain():
    prompt_template = """
    Answer the question as detailed as possible from the provided context. If the answer is not in
    the provided context, just say, "The answer is not available in the context". Do not provide a wrong answer.\n\n
    Context:\n{context}\n
    Question:\n{question}\n

    Answer:
    """
    # *** FIX #1: Updated the model name ***
    model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

# --- Main Application Logic ---
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    try:
        new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    except Exception as e:
        print(f"Error loading vector store: {e}")
        return

    docs = new_db.similarity_search(user_question)
    chain = get_conversational_chain()

    # *** FIX #2: Updated to use the new .invoke() method ***
    response = chain.invoke({"input_documents": docs, "question": user_question}, return_only_outputs=True)

    print("\n--- Answer ---")
    print(response["output_text"])


# --- Running the RAG Pipeline ---
if 'rag_initialized' not in locals():
    raw_text = get_pdf_text(PDF_FILE_PATH)
    if raw_text:
        text_chunks = get_text_chunks(raw_text)
        get_vector_store(text_chunks)
        print("\nPDF processed successfully. You can now ask questions in a new cell.")
        rag_initialized = True



In [12]:
user_input("What is the main conclusion of this document?")


--- Answer ---
The document comprehensively details Java 8 features, focusing on functional programming concepts.  It explains default and static methods in interfaces, functional interfaces (their purpose and examples like `Function`, `Predicate`, `Consumer`, and `Supplier`), lambda expressions, method references, the Stream API, and higher-order functions.  The main conclusion is that Java 8 significantly enhanced Java's capabilities by incorporating functional programming paradigms, leading to more concise, readable, and maintainable code through features like lambda expressions, streams, and functional interfaces.


In [None]:
# --- 1. Install all necessary libraries ---
!pip install -q gradio langchain langchain_google_genai PyPDF2 faiss-cpu langchain-community

import os
import gradio as gr
import nest_asyncio
nest_asyncio.apply()

from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from PyPDF2 import PdfReader

# --- 2. Configuration ---
# IMPORTANT: Add your Google API Key here.
os.environ["GOOGLE_API_KEY"] = "AIzaSyBHQAZt0KaG4QSJ0pLUapMoIInKbjDWrw8"


# --- 3. Core RAG Functions ---

def process_pdf_and_create_db(pdf_file):
    if pdf_file is None:
        return None, gr.update(value="Please upload a PDF file first.", visible=True), gr.update(visible=False)

    try:
        pdf_reader = PdfReader(pdf_file.name)
        text = "".join(page.extract_text() for page in pdf_reader.pages)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
        chunks = text_splitter.split_text(text)
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        vector_store = FAISS.from_texts(chunks, embedding=embeddings)

        # This returns the success status and updates the UI visibility
        return vector_store, gr.update(value="PDF processed successfully! Ready to chat.", visible=True), gr.update(visible=True)
    except Exception as e:
        return None, gr.update(value=f"Error processing PDF: {e}", visible=True), gr.update(visible=False)


def get_conversational_chain():
    prompt_template = """
    Answer the question as detailed as possible from the provided context. If the answer is not in
    the provided context, just say, "The answer is not available in the context". Do not provide a wrong answer.\n\n
    Context:\n{context}\n
    Question:\n{question}\n
    Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    return load_qa_chain(model, chain_type="stuff", prompt=prompt)


# --- 4. Gradio Chatbot Logic ---

def user(user_message, history):
    return "", history + [[user_message, None]]

def bot(history, vector_store):
    if vector_store is None:
        history[-1][1] = "Error: PDF not processed. Please upload a PDF in Step 1."
        return history

    user_question = history[-1][0]

    try:
        docs = vector_store.similarity_search(user_question)
        chain = get_conversational_chain()
        response = chain.invoke({"input_documents": docs, "question": user_question}, return_only_outputs=True)
        bot_message = response["output_text"]
    except Exception as e:
        bot_message = f"An error occurred: {e}"

    history[-1][1] = bot_message
    return history


# --- 5. Gradio Interface Definition ---

with gr.Blocks(title="Chat with Your PDF", theme=gr.themes.Soft()) as demo:
    db_state = gr.State(None)

    gr.Markdown("# 💬 Chat with Your PDF")
    gr.Markdown("This app allows you to chat with a PDF document using a powerful AI model.")

    with gr.Accordion("Step 1: Upload Your PDF", open=True):
        pdf_input = gr.File(label="Upload your PDF", type="filepath")
        status_output = gr.Textbox(label="Processing Status", interactive=False, visible=False)

    # *** THE FIX IS HERE: Changed gr.Box to gr.Group ***
    with gr.Group(visible=False) as chat_box: # This entire section is hidden initially
        gr.Markdown("### Step 2: Ask Questions")
        chatbot = gr.Chatbot(label="Conversation")
        with gr.Row():
            msg = gr.Textbox(label="Your Question", placeholder="Ask a question about the PDF...", scale=4)
            submit_btn = gr.Button("Send", scale=1)

    # --- Event Handlers ---

    # When a PDF is uploaded, process it, update the status, and show/hide the chat box
    pdf_input.upload(
        process_pdf_and_create_db,
        inputs=[pdf_input],
        outputs=[db_state, status_output, chat_box]
    )

    # Handle chat submission
    submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, db_state], chatbot
    )
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, db_state], chatbot
    )

# Launch the app!
demo.launch(debug=True)



  chatbot = gr.Chatbot(label="Conversation")


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://f9951983112bed1956.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
