In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install langchain-community langchain chromadb cohere
!pip install -U langchain-cohere
!pip install langchain_groq

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import CohereEmbeddings

In [11]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
COHERE_API_KEY = user_secrets.get_secret("COHERE_API_KEY")
GEMINI_API_KEY = user_secrets.get_secret("GEMINI_API_KEY")
GROQ_API_KEY = user_secrets.get_secret("GROQ_API_KEY")

In [12]:
# Set embeddings
embedding_model = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=COHERE_API_KEY, user_agent="my_project")

# Docs to index
urls = [
    "https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io"
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorstore
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag",
    embedding=embedding_model,
)

retriever = vectorstore.as_retriever(
                search_type="similarity",
                search_kwargs={'k': 4}, # number of documents to retrieve
            )

In [13]:
question = "what are the different kind of agentic design patterns?"

In [14]:
docs = retriever.invoke(question)

In [15]:
print(f"Title: {docs[0].metadata['title']}\n\nSource: {docs[0].metadata['source']}\n\nContent: {docs[0].page_content}\n")

Title: Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance

Source: https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io

Content: GPT-4 is dwarfed by incorporating an iterative agent workflow. Indeed, wrapped in an agent loop, GPT-3.5 achieves up to 95.1%. Open source agent tools and the academic literature on agents are proliferating, making this an exciting time but also a confusing one. To help put this work into perspective, I’d like to share a framework for categorizing design patterns for building agents. My team AI Fund is successfully using these patterns in many applications, and I hope you find them useful.Reflection: The LLM examines its own work to come up with ways to improve it. Tool Use: The LLM is given tools such as web search, code execution, or any other function to help it gather information, take action, or process data.Planning: The LLM comes up with, and executes, a multistep plan to achiev

# Check document relevancy 

In [21]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_groq import ChatGroq

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# LLM with function call
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0, api_key=GROQ_API_KEY)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

# Filter out the non-relevant docs

In [25]:
docs_to_use = []
max_docs = 3  # Limit to 3 documents

for idx, doc in enumerate(docs):
    if idx >= max_docs:  # Break the loop if we've already processed 3 documents
        break
    
    print(doc.page_content, '\n', '-'*50)
    res = retrieval_grader.invoke({"question": question, "document": doc.page_content})
    print(res, '\n')
    
    if res.binary_score == 'yes':
        docs_to_use.append(doc)

# Print the number of docs retrieved
print(f"Retrieved {len(docs_to_use)} documents.")


GPT-4 is dwarfed by incorporating an iterative agent workflow. Indeed, wrapped in an agent loop, GPT-3.5 achieves up to 95.1%. Open source agent tools and the academic literature on agents are proliferating, making this an exciting time but also a confusing one. To help put this work into perspective, I’d like to share a framework for categorizing design patterns for building agents. My team AI Fund is successfully using these patterns in many applications, and I hope you find them useful.Reflection: The LLM examines its own work to come up with ways to improve it. Tool Use: The LLM is given tools such as web search, code execution, or any other function to help it gather information, take action, or process data.Planning: The LLM comes up with, and executes, a multistep plan to achieve a goal (for example, writing an outline for an essay, then doing online research, then writing a draft, and so on).Multi-agent collaboration: More than one AI agent work together, splitting up tasks and

In [26]:
# import time

# docs_to_use = []
# max_docs = 3  # Set the maximum number of documents to retrieve
# retries = 0
# max_retries = 5  # Set the max retries for error handling

# for doc in docs:
#     if len(docs_to_use) >= max_docs:
#         break  # Stop if we've already retrieved 3 documents
    
#     print(doc.page_content, '\n', '-'*50)
    
#     success = False
#     while retries < max_retries and not success:
#         try:
#             # Call the API to get the result for the document
#             res = retrieval_grader.invoke({"question": question, "document": doc.page_content})
#             print(res, '\n')
            
#             # If the binary_score is 'yes', append the document to docs_to_use
#             if res.get('binary_score') == 'yes':  # Adjust based on response format
#                 docs_to_use.append(doc)
            
#             success = True  # Mark as successful if no error occurs
#         except Exception as e:
#             # If an error occurs (e.g., rate limit), retry with delay
#             print(f"Error encountered: {e}. Retrying...")
#             retries += 1
#             delay = min(2 ** retries, 60)  # Exponential backoff with a max delay of 60 seconds
#             print(f"Retrying after {delay} seconds...")
#             time.sleep(delay)  # Wait before retrying

#     # If we've successfully retrieved 3 documents, stop further processing
#     if len(docs_to_use) >= max_docs:
#         break

# if len(docs_to_use) < max_docs:
#     print(f"Retrieved {len(docs_to_use)} documents, fewer than {max_docs} due to errors or retries.")
# else:
#     print(f"Successfully retrieved {max_docs} documents.")


In [28]:
from langchain_core.output_parsers import StrOutputParser

# Prompt
system = """You are an assistant for question-answering tasks. Answer the question based upon your knowledge. 
Use three-to-five sentences maximum and keep the answer concise."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved documents: \n\n <docs>{documents}</docs> \n\n User question: <question>{question}</question>"),
    ]
)

# LLM
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0, api_key = GROQ_API_KEY)

# Post-processing
def format_docs(docs):
    return "\n".join(f"<doc{i+1}>:\nTitle:{doc.metadata['title']}\nSource:{doc.metadata['source']}\nContent:{doc.page_content}\n</doc{i+1}>\n" for i, doc in enumerate(docs))

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"documents":format_docs(docs_to_use), "question": question})
print(generation)

According to the retrieved documents, there are four main agentic design patterns mentioned:

1. Reflection: The LLM examines its own work to come up with ways to improve it.
2. Tool Use: The LLM is given tools such as web search, code execution, or any other function to help it gather information, take action, or process data.
3. Planning: The LLM comes up with, and executes, a multistep plan to achieve a goal.
4. Multi-agent collaboration: More than one AI agent work together, splitting up tasks and discussing and debating ideas, to come up with better solutions than a single agent would.


# Check for Hallucinations

In [29]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in 'generation' answer."""

    binary_score: str = Field(
        ...,
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

# LLM with function call
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0, api_key = GROQ_API_KEY)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
    Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n <facts>{documents}</facts> \n\n LLM generation: <generation>{generation}</generation>"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader

response = hallucination_grader.invoke({"documents": format_docs(docs_to_use), "generation": generation})
print(response)

binary_score='yes'


# Highlight the used docs

In [30]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate

# Data model
class HighlightDocuments(BaseModel):
    """Return the specific part of a document used for answering the question."""

    id: List[str] = Field(
        ...,
        description="List of id of docs used to answers the question"
    )

    title: List[str] = Field(
        ...,
        description="List of titles used to answers the question"
    )

    source: List[str] = Field(
        ...,
        description="List of sources used to answers the question"
    )

    segment: List[str] = Field(
        ...,
        description="List of direct segements from used documents that answers the question"
    )

# LLM
llm = ChatGroq(model="mixtral-8x7b-32768", temperature=0, api_key = GROQ_API_KEY)

# parser
parser = PydanticOutputParser(pydantic_object=HighlightDocuments)

# Prompt
system = """You are an advanced assistant for document search and retrieval. You are provided with the following:
1. A question.
2. A generated answer based on the question.
3. A set of documents that were referenced in generating the answer.

Your task is to identify and extract the exact inline segments from the provided documents that directly correspond to the content used to 
generate the given answer. The extracted segments must be verbatim snippets from the documents, ensuring a word-for-word match with the text 
in the provided documents.

Ensure that:
- (Important) Each segment is an exact match to a part of the document and is fully contained within the document text.
- The relevance of each segment to the generated answer is clear and directly supports the answer provided.
- (Important) If you didn't used the specific document don't mention it.

Used documents: <docs>{documents}</docs> \n\n User question: <question>{question}</question> \n\n Generated answer: <answer>{generation}</answer>

<format_instruction>
{format_instructions}
</format_instruction>
"""


prompt = PromptTemplate(
    template= system,
    input_variables=["documents", "question", "generation"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Chain
doc_lookup = prompt | llm | parser

# Run
lookup_response = doc_lookup.invoke({"documents":format_docs(docs_to_use), "question": question, "generation": generation})

In [31]:
for id, title, source, segment in zip(lookup_response.id, lookup_response.title, lookup_response.source, lookup_response.segment):
    print(f"ID: {id}\nTitle: {title}\nSource: {source}\nText Segment: {segment}\n")

ID: doc1
Title: Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance
Source: https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io
Text Segment: I’d like to share a framework for categorizing design patterns for building agents. My team AI Fund is successfully using these patterns in many applications, and I hope you find them useful.

ID: doc2
Title: Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance
Source: https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io
Text Segment: I’d like to share a framework for categorizing design patterns for building agents. My team AI Fund is successfully using these patterns in many applications, and I hope you find them useful.

ID: doc3
Title: Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance
Source: https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-stagi