### 1. Get the models

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_groq import ChatGroq
LLM = ChatGroq(model="deepseek-r1-distill-llama-70b")
LLM.invoke("What is the capital of France?")

AIMessage(content='<think>\n\n</think>\n\nThe capital of France is Paris.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 10, 'total_tokens': 22, 'completion_time': 0.055751182, 'prompt_time': 0.000209927, 'queue_time': 0.202566981, 'total_time': 0.055961109}, 'model_name': 'deepseek-r1-distill-llama-70b', 'system_fingerprint': 'fp_1bbe7845ec', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--d281932e-be79-4421-a2b2-d44568d46926-0', usage_metadata={'input_tokens': 10, 'output_tokens': 12, 'total_tokens': 22})

In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
EMBEDDING=GoogleGenerativeAIEmbeddings(model="models/embedding-001")
len(EMBEDDING.embed_query("What is the capital of France?"))

768

### 2. Data Ingestion

In [4]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
file_path=os.path.join(os.getcwd(), "data", "recipes.pdf")
loader= PyPDFLoader(file_path)
documents = loader.load()
len(documents)

178

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150, length_function=len)
split_docs = text_splitter.split_documents(documents)
len(split_docs)

360

In [7]:
split_docs[2].metadata

{'producer': 'Adobe PDF Library 15.0',
 'creator': 'Adobe InDesign 14.0 (Windows)',
 'creationdate': '2020-02-13T10:52:54+05:30',
 'moddate': '2020-02-13T10:53:27+05:30',
 'trapped': '/False',
 'source': '/Users/arindam/Machine Learning/GenAI_2025/AgenticAI/4_Agentic_Document_Portal_LangGraph_E2E/notebooks/data/recipes.pdf',
 'total_pages': 178,
 'page': 6,
 'page_label': '7'}

### 3. Store the docs in vector db

1. FIASS is in-memory vector store (eg. chroma)
2. on disk storage(faiss you can persist over the disk,chroma)
3. cloud storage(cloud variant of faiss is not available)(pinecone,weaviate,milvus,mongodbvectorsearch,astradb)

In [8]:
from langchain.vectorstores import FAISS
vectorstore = FAISS.from_documents(
    split_docs,
    EMBEDDING,)

In [9]:
vectorstore.similarity_search("How to make Stuffed Cauliflower Parantha?")

[Document(id='3983a019-fc81-443c-be5a-cedf1dff1d73', metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 14.0 (Windows)', 'creationdate': '2020-02-13T10:52:54+05:30', 'moddate': '2020-02-13T10:53:27+05:30', 'trapped': '/False', 'source': '/Users/arindam/Machine Learning/GenAI_2025/AgenticAI/4_Agentic_Document_Portal_LangGraph_E2E/notebooks/data/recipes.pdf', 'total_pages': 178, 'page': 60, 'page_label': '45'}, page_content='45\nStuffed Cauliflower Parantha'),
 Document(id='94ce773a-3752-423a-b636-84f010973e6f', metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 14.0 (Windows)', 'creationdate': '2020-02-13T10:52:54+05:30', 'moddate': '2020-02-13T10:53:27+05:30', 'trapped': '/False', 'source': '/Users/arindam/Machine Learning/GenAI_2025/AgenticAI/4_Agentic_Document_Portal_LangGraph_E2E/notebooks/data/recipes.pdf', 'total_pages': 178, 'page': 62, 'page_label': '47'}, page_content='47\nStuffed Radish Parantha'),
 Document(id='8ce56c0d-e5d1-4d1

In [10]:
retriever= vectorstore.as_retriever()
retriever.invoke("How to make Stuffed Cauliflower Parantha?")

[Document(id='3983a019-fc81-443c-be5a-cedf1dff1d73', metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 14.0 (Windows)', 'creationdate': '2020-02-13T10:52:54+05:30', 'moddate': '2020-02-13T10:53:27+05:30', 'trapped': '/False', 'source': '/Users/arindam/Machine Learning/GenAI_2025/AgenticAI/4_Agentic_Document_Portal_LangGraph_E2E/notebooks/data/recipes.pdf', 'total_pages': 178, 'page': 60, 'page_label': '45'}, page_content='45\nStuffed Cauliflower Parantha'),
 Document(id='94ce773a-3752-423a-b636-84f010973e6f', metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 14.0 (Windows)', 'creationdate': '2020-02-13T10:52:54+05:30', 'moddate': '2020-02-13T10:53:27+05:30', 'trapped': '/False', 'source': '/Users/arindam/Machine Learning/GenAI_2025/AgenticAI/4_Agentic_Document_Portal_LangGraph_E2E/notebooks/data/recipes.pdf', 'total_pages': 178, 'page': 62, 'page_label': '47'}, page_content='47\nStuffed Radish Parantha'),
 Document(id='8ce56c0d-e5d1-4d1

In [12]:
prompt = """
    You are a helpful assistant that answers questions based on the provided context.
    if the context does not contain enough information to answer the question, respond with "I don't know".
    Context: {context}
    Question: {question}
    Answer:
"""

In [13]:
from langchain.prompts import PromptTemplate
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt
)

In [14]:
prompt_template

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    You are a helpful assistant that answers questions based on the provided context.\n    if the context does not contain enough information to answer the question, respond with "I don\'t know".\n    Context: {context}\n    Question: {question}\n    Answer:\n')

In [1]:
from langchain.core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

ModuleNotFoundError: No module named 'langchain.core'