In [1]:
from langchain_community.document_loaders import PyMuPDFLoader

In [8]:
from dotenv import load_dotenv
import os
load_dotenv()
gemini_api_key=os.environ['GEMINI_API_KEY']


In [2]:
document=PyMuPDFLoader("zepto.pdf").load()

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

test_splitter=RecursiveCharacterTextSplitter( chunk_size=1000,
    chunk_overlap=200,)
spit_doc=test_splitter.split_documents(document)

In [4]:
from langchain.vectorstores import FAISS

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
# Initialize OpenAI Embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=gemini_api_key)

# Create FAISS Vector Store
vector_store = FAISS.from_documents(spit_doc, embeddings)

In [11]:
FAISS.save_local(vector_store,"vector_store")

In [14]:
vector=FAISS.load_local("vector_store",allow_dangerous_deserialization=True,embeddings=embeddings)

In [18]:
retriever = vector.as_retriever( search_kwargs={'k': 6, })


In [19]:
retriever.invoke("hi")

[Document(id='44e7a223-fbbf-4a27-bae9-ee78489dca18', metadata={'producer': 'Canva', 'creator': 'Canva', 'creationdate': '2024-05-23T03:45:33+00:00', 'source': 'zepto.pdf', 'file_path': 'zepto.pdf', 'total_pages': 9, 'format': 'PDF 1.4', 'title': 'NL - Zepto', 'author': 'Bhallamudi Sriharsha', 'subject': '', 'keywords': 'DAGEnn4c29g,BAD8HHSDm2U', 'moddate': '2024-05-23T03:45:29+00:00', 'trapped': '', 'modDate': "D:20240523034529+00'00'", 'creationDate': "D:20240523034533+00'00'", 'page': 6}, page_content='based filtering techniques to ensure a mix of\nbased filtering techniques to ensure a mix of\nfamiliar and new product recommendations\nfamiliar and new product recommendations\nObtain clear and informed user consent for\nObtain clear and informed user consent for\ndata \ncollection \nand \npersonalization\ndata \ncollection \nand \npersonalization\npractices. The essential data will only be\npractices. The essential data will only be\nconnected for recommendations\nconnected for recom

In [25]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
custom_prompt_template = """
You are an AI assistant that answers user queries based on the provided context. 
If the context is insufficient, respond by saying you don't have enough information. 
Be concise and accurate.

Context:
{context}

Question:
{question}

Provide a detailed and well-structured response.
give me short and onpoint answer
"""

# Create a PromptTemplate
custom_prompt = PromptTemplate(
    template=custom_prompt_template,
    input_variables=["context", "question"]
)


In [26]:
# Define OpenAI LLM
llm=ChatGoogleGenerativeAI(model='gemini-2.0-flash',api_key=gemini_api_key)

# Create RetrievalQA Chain
qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever, chain_type="stuff",chain_type_kwargs={"prompt": custom_prompt})


In [27]:
qa_chain.invoke("what is sale of zepto")

{'query': 'what is sale of zepto',
 'result': 'Zepto\'s sales strategies include:\n\n*   **"Subscribe and Save" Option:** Encourages users to subscribe to staples and essentials, with deliveries based on purchase patterns and user consent. This feature is linked to the Zepto Pass to incentivize its purchase/renewal.\n*   **Bundles:** Allowing users to select products for bundles to promote autonomy and product discovery.'}