# **Imports and Initializing the model**

In [None]:
import openai
import gradio as gr
from langchain_openai import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from helper import get_openai_api_key
import nest_asyncio

OPENAI_API_KEY = get_openai_api_key()
nest_asyncio.apply()

# Initialize the OpenAI model
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model="gpt-4o-mini",
    temperature=0
)

# **Prompt**

In [None]:
prompt_template = ChatPromptTemplate.from_template(
    """
    You are a highly intelligent document assistant. Your answers must be strictly based on the provided context.
    If the information is not found in the context, clearly state: "The information is not available in the document."

    Your goal is to:
    1. Understand the user's question thoroughly and break it into subparts if complex.
    2. Retrieve the most relevant content from all provided documents.
    3. Combine information across documents when necessary to provide accurate and coherent answers.
    4. If the question involves locating specific terms (e.g., "pages where the word 'overfitting' appears"), identify and provide precise details.
    5. Handle differentiations, tabular data, and return results with well-formatted tables when applicable.

    Context:
    {context}
    Question: {input}
    Answer the question thoughtfully and accurately based on the provided context:
    """
)

# **Embeddings, Chunking and Vector store**

In [None]:
# Function to process PDFs and create embeddings
def process_pdfs(pdf_paths):
    embeddings_model = OpenAIEmbeddings()
    all_documents = []

    for pdf_path in pdf_paths:
        loader = PyPDFLoader(pdf_path)
        docs = loader.load()
        all_documents.extend(docs)

    # Create chunks using a text splitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    final_documents = text_splitter.split_documents(all_documents)

    # Process embeddings for all documents
    vector_store = FAISS.from_documents(final_documents, embeddings_model)
    return vector_store

# Function to query the vector store
def query_vector_store(vector_store, user_query):
    retriever = vector_store.as_retriever()
    document_chain = create_stuff_documents_chain(llm, prompt_template)
    retrieval_chain = create_retrieval_chain(retriever, document_chain)

    response = retrieval_chain.invoke({'input': user_query})

    # Ensure response is strictly based on the document context
    if response.get('answer', '').strip():
        return response['answer']
    else:
        return "The information is not available in the document."

# **Gradio interface**

In [None]:
vector_store, chat_history = None, []

def handle_pdfs(pdfs=None, query=None):
    global vector_store, chat_history
    if pdfs:
        vector_store = process_pdfs([pdf.name for pdf in pdfs])
        chat_history.clear()
    elif query:
        if not vector_store:
            chat_history.append({"role": "user", "content": query})
            chat_history.append({"role": "assistant", "content": "Please upload PDFs first."})
        else:
            answer = query_vector_store(vector_store, query)
            chat_history.append({"role": "user", "content": query})
            chat_history.append({"role": "assistant", "content": answer})
    return chat_history

def refresh_chat():
    global vector_store, chat_history
    vector_store, chat_history = None, []
    return []  # Return an empty list to clear the chatbox

# Custom CSS for better UX
css = """
.chatbot .user-message { color: #1E88E5; }
.chatbot .assistant-message { color: #28A745; }
.chatbot { height: 500px; overflow-y: scroll; }
.row { display: flex; justify-content: space-between; }
.column { flex: 1; margin-right: 10px; }
"""

with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange_Green") as app:
    gr.HTML(f"<style>{css}</style>")
    gr.Markdown("""
        <div style="text-align: center;">
            <h1>📖 Multi-PDF Question-Answer Assistant</h1>
            <p>An interactive application to upload multiple PDFs and ask complex questions about their content.</p>
        </div>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            pdf_upload = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
            refresh_button = gr.Button("Refresh", variant="secondary")
        with gr.Column(scale=2):
            query_input = gr.Textbox(label="Your Question", placeholder="Type your question here...", lines=1)
            ask_button = gr.Button("Ask Question")

    with gr.Row():
        chatbox = gr.Chatbot(label="Chat with your PDFs", type="messages")

    pdf_upload.change(lambda pdfs: handle_pdfs(pdfs=pdfs), inputs=pdf_upload, outputs=chatbox)
    ask_button.click(lambda query: handle_pdfs(query=query), inputs=query_input, outputs=chatbox)
    refresh_button.click(refresh_chat, inputs=None, outputs=chatbox)

if __name__ == "__main__":
    app.launch(share=True)