In [1]:
import gradio as gr
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
#from langchain.memory import ConversationBufferMemory

from langchain_classic.memory import ConversationBufferMemory


import os

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

pdf_path = r"C:\Users\azeem\OneDrive\Desktop\attention_is_all_you_need.pdf"

# Load and split PDF
loader = PyPDFLoader(pdf_path)
pages = []
for page in loader.lazy_load():   # using sync version instead of async
    pages.append(page)

# Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Build FAISS vectorstore
vectorstore = FAISS.from_texts(
    [page.page_content for page in pages],
    embedding=embeddings
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 2})


llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)



def chat(query, history):
    """Chat function that retrieves context and generates a Gemini response."""

    # Retrieve relevant docs
    retrieved_docs = retriever.invoke(query)
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])

    # Build the RAG prompt
    prompt = f"""
You are an AI assistant. Use the below context from a research paper to answer the user's question.
Be factual and concise.

Context:
{context}

Question: {query}

Answer:
"""

    # Get response from Gemini
    response = llm.invoke(prompt)
    answer = response.content

    # Update memory (conversation history)
    memory.chat_memory.add_user_message(query)
    memory.chat_memory.add_ai_message(answer)

    # Append to visible history for UI
    history.append({"role": "user","content": query})
    history.append({"role": "assistant","content":answer})
    return history, history

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## ðŸ“˜ RAG Chatbot â€” Ask Questions from *Attention Is All You Need*")

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(label="Ask a question about the paper")

    clear = gr.Button("Clear Chat")

    msg.submit(chat, [msg, chatbot], [chatbot, chatbot])
    clear.click(lambda: ([], memory.clear()), None, [chatbot])



demo.launch(share = True)


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  with gr.Blocks(theme=gr.themes.Soft()) as demo:


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://f47f684ed647d06953.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


