In [20]:
%pip install bs4 tiktoken pydantic

Note: you may need to restart the kernel to use updated packages.


In [11]:
import os
from dotenv import load_dotenv

from pydantic import BaseModel, Field
from typing import List
from langchain.output_parsers import PydanticOutputParser

# LangChain components
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnablePassthrough

# Load environment variables from .env file
load_dotenv()

# Configure the Gemini API key
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("Gemini API Key not found. Please set it in the .env file.")

# Set up the Gemini model for generation
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    # max_tokens=1000,
    timeout=None,
    max_retries=2,
    # other params...
)

In [2]:
# The documents we want to index
urls = [
    "https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io"
]

# Load the documents
print("Loading documents from URLs...")
loader = WebBaseLoader(urls)
docs = loader.load()

# Split the documents into manageable chunks
print("Splitting documents...")
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)
splits = text_splitter.split_documents(docs)

# Create embeddings and vector store
print("Creating vector store...")
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)

# Create the retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 chunks

print("\nSetup Complete! We are ready to build the advanced RAG chains.")

Loading documents from URLs...
Splitting documents...
Creating vector store...

Setup Complete! We are ready to build the advanced RAG chains.


In [3]:
question = "what are the different kinds of agentic design patterns?"
docs = retriever.invoke(question)

In [4]:
print(f"Title: {docs[0].metadata['title']}\n\nSource: {docs[0].metadata['source']}\n\nContent: {docs[0].page_content}\n")

Title: Agentic Design Patterns Part 4: Planning

Source: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io

Content: Agentic Design Patterns Part 4: Planning✨ AI Dev 25 heads to New York City on November 14! Get your Early Bird tickets nowExplore CoursesAI NewsletterThe BatchAndrew's LetterData PointsML ResearchBlog✨ AI Dev x NYCCommunityForumEventsAmbassadorsAmbassador SpotlightResourcesCompanyAboutCareersContactStart LearningWeekly IssuesAndrew's LettersData PointsML ResearchBusinessScienceCultureHardwareAI CareersAboutSubscribeThe BatchLettersArticleAgentic Design Patterns Part 4, Planning Large language models can drive powerful agents to execute complex tasks if you ask them to plan the steps before they act.LettersTechnical InsightsPublishedApr 10, 2024Reading time3 min readShareDear friends,Planning is a key agentic AI design pattern in which we use a large language model (LLM) to autonomously decide on what sequence o

In [5]:
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

In [6]:
docs_to_use = []
for doc in docs:
    print(doc.page_content, '\n', '-'*50)
    res = retrieval_grader.invoke({"question": question, "document": doc.page_content})
    print(res,'\n')
    if res.binary_score == 'yes':
        docs_to_use.append(doc)

Agentic Design Patterns Part 4: Planning✨ AI Dev 25 heads to New York City on November 14! Get your Early Bird tickets nowExplore CoursesAI NewsletterThe BatchAndrew's LetterData PointsML ResearchBlog✨ AI Dev x NYCCommunityForumEventsAmbassadorsAmbassador SpotlightResourcesCompanyAboutCareersContactStart LearningWeekly IssuesAndrew's LettersData PointsML ResearchBusinessScienceCultureHardwareAI CareersAboutSubscribeThe BatchLettersArticleAgentic Design Patterns Part 4, Planning Large language models can drive powerful agents to execute complex tasks if you ask them to plan the steps before they act.LettersTechnical InsightsPublishedApr 10, 2024Reading time3 min readShareDear friends,Planning is a key agentic AI design pattern in which we use a large language model (LLM) to autonomously decide on what sequence of steps to execute to accomplish a larger task. For example, if we ask an agent to do online research on a given topic, we might use an LLM to break down the objective into small

In [7]:
# Prompt
system = """You are an assistant for question-answering tasks. Answer the question based upon your knowledge. 
Use three-to-five sentences maximum and keep the answer concise."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved documents: \n\n <docs>{documents}</docs> \n\n User question: <question>{question}</question>"),
    ]
)

# Post-processing
def format_docs(docs):
    return "\n".join(f"<doc{i+1}>:\nTitle:{doc.metadata['title']}\nSource:{doc.metadata['source']}\nContent:{doc.page_content}\n</doc{i+1}>\n" for i, doc in enumerate(docs))

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"documents":format_docs(docs_to_use), "question": question})
print(generation)

The different kinds of agentic design patterns include Reflection, Tool Use, Planning, and Multi-agent collaboration. Reflection involves the Large Language Model (LLM) examining its own work to improve it. Tool Use allows the LLM to utilize external tools like web search or code execution to gather information or take action. Planning enables the LLM to autonomously devise and execute a sequence of steps for complex tasks. Lastly, Multi-agent collaboration involves multiple AI agents working together, often by having an LLM play different roles, to achieve a common goal.


In [8]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in 'generation' answer."""

    binary_score: str = Field(
        ...,
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
    Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n <facts>{documents}</facts> \n\n LLM generation: <generation>{generation}</generation>"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader

response = hallucination_grader.invoke({"documents": format_docs(docs_to_use), "generation": generation})
print(response)

binary_score='yes'


In [12]:
# Data model
class HighlightDocuments(BaseModel):
    """Return the specific part of a document used for answering the question."""

    id: List[str] = Field(
        ...,
        description="List of id of docs used to answers the question"
    )

    title: List[str] = Field(
        ...,
        description="List of titles used to answers the question"
    )

    source: List[str] = Field(
        ...,
        description="List of sources used to answers the question"
    )

    segment: List[str] = Field(
        ...,
        description="List of direct segements from used documents that answers the question"
    )

# parser
parser = PydanticOutputParser(pydantic_object=HighlightDocuments)

# Prompt
system = """You are an advanced assistant for document search and retrieval. You are provided with the following:
1. A question.
2. A generated answer based on the question.
3. A set of documents that were referenced in generating the answer.

Your task is to identify and extract the exact inline segments from the provided documents that directly correspond to the content used to 
generate the given answer. The extracted segments must be verbatim snippets from the documents, ensuring a word-for-word match with the text 
in the provided documents.

Ensure that:
- (Important) Each segment is an exact match to a part of the document and is fully contained within the document text.
- The relevance of each segment to the generated answer is clear and directly supports the answer provided.
- (Important) If you didn't used the specific document don't mention it.

Used documents: <docs>{documents}</docs> \n\n User question: <question>{question}</question> \n\n Generated answer: <answer>{generation}</answer>

<format_instruction>
{format_instructions}
</format_instruction>
"""


prompt = PromptTemplate(
    template= system,
    input_variables=["documents", "question", "generation"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Chain
doc_lookup = prompt | llm | parser

# Run
try:
    lookup_response = doc_lookup.invoke({"documents":format_docs(docs_to_use), "question": question, "generation": generation})
    print(lookup_response)
except Exception as e:
    # Print raw output for debugging if parsing fails
    raw_output = (prompt | llm).invoke({"documents":format_docs(docs_to_use), "question": question, "generation": generation})
    print("Raw LLM output (not parsed):\n", raw_output)
    print("Error:", e)

id=['doc2', 'doc1', 'doc3'] title=['Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance', 'Agentic Design Patterns Part 4: Planning', 'Agentic Design Patterns Part 5, Multi-Agent Collaboration'] source=['https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io', 'https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io', 'https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io'] segment=['Reflection: The LLM examines its own work to come up with ways to improve it.', 'Tool Use: The LLM is given tools such as web search, code execution, or any other function to help it gather information, take action, or process data.', 'Planning is a key agentic AI design pattern in which we use a large language model (LLM) to autonomously decide on what sequence of steps to execute to accomplish a larger task.