In [None]:
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
import os

In [None]:
loader = PyMuPDFLoader("SherlockHolmesComplete.pdf")
docs = loader.load()

In [None]:
len(docs)

In [40]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

#sentence-transformers/all-MiniLM-L6-v2
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")

VECTOR_DB_PATH = "sherlock-holmes-vectorstore"

if os.path.exists(VECTOR_DB_PATH):
    print("🔁 Loading existing vectorstore...")
    vectorstore = FAISS.load_local(VECTOR_DB_PATH, embeddings, allow_dangerous_deserialization=True)
    print("✅ Existing Vectorstore loaded from:", VECTOR_DB_PATH)
else:
    print("🧠 Creating new vectorstore...")
    vectorstore = FAISS.from_documents(chunks, embedding=embeddings)
    vectorstore.save_local(VECTOR_DB_PATH)
    print("✅ Vectorstore saved at:", VECTOR_DB_PATH)

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})


llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)

prompt = ChatPromptTemplate.from_template("""
You are a helpful assistant. Use the provided context to answer the question.

Context:
{context}

Question:
{question}

Answer clearly and concisely. Do not answer anything beyond the context you are given.
Even if you have an answer from your existing knowledge you have to answer solely based on the 
context.
""")

In [None]:
chain = prompt | llm

question = "Tell me about the 6 Napoleon"
retrieved_docs = retriever.invoke(question)
context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
inputs = {
    "context": context_text,
    "question": question
}
response = chain.invoke(inputs)

print("🧠 Question:", question)
print("💬 Answer:", response.content)

print(f"\n📚 Retrieved Context: {context_text}")
