In [1]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
from dotenv import load_dotenv
load_dotenv()

True

In [2]:

llm=ChatGroq(model='llama-3.1-70b-versatile')

In [3]:
messages=[
    SystemMessage(content="Translate the Following from English to Italian"),
    HumanMessage(content="hi!")
]
llm.invoke(messages)

AIMessage(content='Ciao!', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 44, 'total_tokens': 48, 'completion_time': 0.016023862, 'prompt_time': 0.011288759, 'queue_time': 0.49734073800000006, 'total_time': 0.027312621}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_9260b4bb2e', 'finish_reason': 'stop', 'logprobs': None}, id='run-66883e01-f309-415e-81d8-fe456cc9d099-0', usage_metadata={'input_tokens': 44, 'output_tokens': 4, 'total_tokens': 48})

In [4]:
from langchain_core.output_parsers import StrOutputParser
parser=StrOutputParser()

result=llm.invoke(messages)
parser.invoke(result)

'Ciao!'

In [5]:
chain=llm | parser
chain.invoke(messages)

'Ciao!'

In [6]:
from langchain_core.prompts import ChatPromptTemplate
system_template="Translate the following into {language}:"
prompt_template=ChatPromptTemplate.from_messages([("system",system_template),("user","{text}")])
result=prompt_template.invoke({"language":"hindi","text":"hi"})
result

ChatPromptValue(messages=[SystemMessage(content='Translate the following into hindi:', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi', additional_kwargs={}, response_metadata={})])

In [7]:
result.to_messages()

[SystemMessage(content='Translate the following into hindi:', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='hi', additional_kwargs={}, response_metadata={})]

In [8]:
chain=prompt_template | llm | parser
chain.invoke({"language":"french","text":"hi"})
# my understanding, first prompt_template is invoked, then llm is invoked and then parser is invoked

'Salut'

## RAG for Q&A

In [9]:
#Each LLM has input token or character limit. So extract data from documents nad chunk it. Then create embeddings for each text chunk.
# Save embeddings in a vector database. 

#user question is converted into embeddings and then we do semantic search to find top 6 or 3 answer from our knowledge base. Rank answers
# Then we pass the question directly to LLM without converting to embeddings and give the llm answers rank and LLM generates a natural response.
# so the chatmodel/LLM produces an answer using a prompt that includes the question and retreived data.

In [10]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

bs4_strainer=bs4.SoupStrainer(class_=("post-title","post-header","post-content"))
loader=WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only":bs4_strainer},
)

docs=loader.load()
len(docs[0].page_content)

USER_AGENT environment variable not set, consider setting it to identify your requests.


43131

In [11]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)

all_splits=text_splitter.split_documents(docs)
len(all_splits)

66

In [13]:
len(all_splits[0].page_content)

969

In [14]:
all_splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 8}

In [15]:
all_splits[0]

Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 8}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from 

In [16]:
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name="sentence-transformers/all-mpnet-base-v2"
model_kwargs={"device": "cpu"}
embeddings=HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
vectorStore=Chroma.from_documents(documents=all_splits, embedding=embeddings)

  embeddings=HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
  from tqdm.autonotebook import tqdm, trange


In [17]:
retriever=vectorStore.as_retriever(search_type="similarity", search_kwargs={"k":6})
retrieved_docs=retriever.invoke("What are the approaches to Task Decomposition?")
retrieved_docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 2192}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 19373}, page_content="(3) Task execution: Expert models execute on the specific tasks and log results.\nInstruction:\n\nWith th

In [18]:
import os
api_key = os.getenv("LANGSMITH_API_KEY")

In [20]:
from langchain import hub
prompt=hub.pull("rlm/rag-prompt") # predefined prompt is imported
example_messages=prompt.invoke(
    {"context":"filler context",
     "question":"filler question"}
).to_messages()

In [21]:
example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:", additional_kwargs={}, response_metadata={})]

In [22]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_doc(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain=(
    {"context":retriever | format_doc, "question":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is the process of breaking down a problem into smaller, manageable tasks or subgoals to achieve the desired outcome. It can be done in three ways: (1) by using a large language model (LLM) with simple prompting, (2) by using task-specific instructions, or (3) with human inputs. This process is a key part of task planning in systems like HuggingGPT.'

In [23]:
vectorStore.delete_collection()

In [25]:
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name="sentence-transformers/all-mpnet-base-v2"
model_kwargs={"device": "cpu"}
embeddings=HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
vectorStore=Chroma.from_documents(documents=all_splits, embedding=embeddings)

In [26]:
retriever=vectorStore.as_retriever(search_type="similarity", search_kwargs={"k":6})
retrieved_docs=retriever.invoke("What are the approaches to Task Decomposition?")
retrieved_docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 2192}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 19373}, page_content="(3) Task execution: Expert models execute on the specific tasks and log results.\nInstruction:\n\nWith th

In [27]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt=(
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer"
    "the question. If you don't know the answer, say that you"
    "don't know. Use three sentences maximum and keep the"
    "answer concise."
    "\n\n"
    "{context}"
)

prompt=ChatPromptTemplate.from_messages(
    [("system",system_prompt),
     ("human","{input}")]
)

question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever, question_answer_chain)
response=rag_chain.invoke({"input":"What is Task Decomposition?"})

response

{'input': 'What is Task Decomposition?',
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 2192}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 19373}, page_content="(3) Task execution: Expert models execute on the sp

In [3]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from langchain import hub
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

loader=WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content","post-title","post-header")
        )
)
)

docs=loader.load()
print(f"The loaded document contains {len(docs[0].page_content)} characters")

USER_AGENT environment variable not set, consider setting it to identify your requests.


The loaded document contains 43131 characters


In [4]:
print(f"The first 500 characters of the document: \n {docs[0].page_content[:500]}\n")

The first 500 characters of the document: 
 

      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In



In [6]:
#Indexing: Split
textSplitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split=textSplitter.split_documents(docs)
print(f'Total text chunks are {len(split)} \n\n')
print(f"The size of the first text chunk is {len(split[0].page_content)}\n")

Total text chunks are 66 


The size of the first text chunk is 969



In [7]:
#Indexing: Store
model_name="sentence-transformers/all-mpnet-base-v2"
model_kwargs={"device":"cpu"}
embeddings=HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
vectorStore=Chroma.from_documents(documents=split, embedding=embeddings)

#Retrieval and Generation: Retrieve
retriever=vectorStore.as_retriever(search_type="similarity", search_kwargs={"k":6})
retrieved_docs=retriever.invoke("What are the approaches to Task Decomposition?")
print("Retrieved Docs \n", retrieved_docs)

  from tqdm.autonotebook import tqdm, trange


Retrieved Docs 
 [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'), Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content="Prompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statemen

In [8]:
system_prompt=(
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer"
    "the question. If you don't know the answer, say that you don't know. Use three sentences maximum abd keep the"
    "answer concise."
    "\n\n"
    "{context}"
)

prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [9]:
question_answering_chain=create_stuff_documents_chain(llm, prompt)
rag_chain=create_retrieval_chain(retriever, question_answering_chain)
response=rag_chain.invoke({"input": "What is task decomposition?"})
response["answer"]

'Task decomposition is the process of breaking down a problem or task into smaller, more manageable subtasks or steps. It can be done using various methods, including simple prompting to a large language model (LLM), using task-specific instructions, or with human input.'

# Adding Chat History
1. We need to update our prompt to support historical messages as an input
2. Contextualizing Questions: Add a subchain that takes the latest user question and reformulates it in the context of chat history. This can be thought of simply as building a new "history aware" retriever. 

Before we had: Query -> retriever
Now, (query, conversation history)-> LLM -> rephrased query -> retriever

In [10]:
# Contextualizing the question

from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt=(
    "Given a chat history and the latest user question"
    "which might reference context in the chat history,"
    "formulate a standalone question which can be understood"
    "without the chat history. Do not answer the question."
    "just reformulate it if needed and otherwise return it as is"
)

context_q_prompt=ChatPromptTemplate.from_messages(
    [
        ("system",contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}")
    ]
)

history_aware_retriever=create_history_aware_retriever(
    llm, retriever, context_q_prompt
)

system_prompt=(
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer"
    "the question. If you don't know the answer, say that you don't know. Use three sentemces maximum and keep the"
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}"),
    ]
)

question_answer_chain=create_stuff_documents_chain(llm, qa_prompt)
rag_chain=create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [11]:
from langchain_core.messages import AIMessage, HumanMessage
chat_history=[]
question="What is Task Decomposition?"
ai_msg_1=rag_chain.invoke({"input":question, "chat_history": chat_history})

In [13]:
ai_msg_1

{'input': 'What is Task Decomposition?',
 'chat_history': [],
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content="Prompt LM with 100 most recent observations and to generate 3 most salien

In [14]:
chat_history.extend([
    HumanMessage(content=question),
    AIMessage(content=ai_msg_1['answer'])
])

In [15]:
second_question="What are common ways of doing it"

ai_msg_2=rag_chain.invoke({"input": second_question, "chat_history": chat_history})

In [16]:
ai_msg_2

{'input': 'What are common ways of doing it',
 'chat_history': [HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Task decomposition is the process of breaking down a complex problem into smaller, manageable sub-tasks or steps. It can be done in three ways: (1) by a Large Language Model (LLM) with simple prompting, (2) by using task-specific instructions, or (3) with human inputs.', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority v

# Stateful Management of ChatHistory

In real world Q and A applications, we need a way to automatically insert and update chat history. 
For this we use:
BaseChatMessageHistory: Store Chat History

RunnableWithMessageHistory:
This is a wrapper for Large Chain Expression Language (LCEL) chain and a base chat message history, this handles inserting and updating chat history after each invocation.  

In [17]:
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_groq import ChatGroq
from langchain_chroma import Chroma
from langchain import hub
import bs4
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

In [19]:
## Statefully manage Chat History
store={}

def get_session_history(session_id:str)-> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id]=ChatMessageHistory()
    return store[session_id]

convolutional_retrieval_chain=RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [20]:
convolutional_retrieval_chain.invoke(
    {"input":"What is Task Decomposition"},
    config={
        "configurable":{"session_id":"abc123"}
    },
)['answer']

'Task decomposition is the process of breaking down a problem into smaller, more manageable subtasks or steps. In the context of the provided text, task decomposition can be done in three ways: (1) using a large language model (LLM) with simple prompting, (2) using task-specific instructions, or (3) with human inputs.'

In [21]:
convolutional_retrieval_chain.invoke({"input":"What are common ways of doing it"},
                                    config={"configurable":{"session_id":"abc123"}},)['answer']

'Task decomposition can be done in three common ways: \n\n1. Using a large language model (LLM) with simple prompting, such as asking "Steps for XYZ." or "What are the subgoals for achieving XYZ?".\n2. Using task-specific instructions, such as "Write a story outline." for writing a novel.\n3. With human inputs, where a person manually breaks down a task into smaller subtasks.'

# Agents

Agents leverage the reasoning capabilities of LLMs to make decisions during execution.
1. Agents generate the input to retriever directly, without necessarily needing us to explicitly build in contextualization as we did above.
2. Agents can execute multiple retrieval steps in service of a query.

## Retrieval Tool
Agents can access tools and manage their execution. In this case, we will convert our retriever into a LangCaain tool to be wielded by the agent

In [22]:
from langchain.tools.retriever import create_retriever_tool

tool=create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post"
)

tools=[tool]

In [23]:
tool.invoke("Task decomposition")

'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.\n\nPrompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions.\n\n\nPlanning & Reacting: translate the reflections and the environment information into actions\n\nPlanning is essentially in order to optimize believability at the moment v

## Agent Constructor
Now that we have tools and LLM, we use LangGraph to construct the agent

In [30]:
from langgraph.prebuilt import create_react_agent
agent_executor=create_react_agent(llm, tools)

query="What is Task Decomposition"

for s in agent_executor.stream(
    {"messages": [HumanMessage(content=query)]},):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_x47a', 'function': {'arguments': '{"query": "Task Decomposition"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 243, 'total_tokens': 263, 'completion_time': 0.08, 'prompt_time': 0.058093186, 'queue_time': 0.005254675000000007, 'total_time': 0.138093186}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_b3ae7e594e', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b679d750-4e79-4c27-b85f-475425e6be9d-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'Task Decomposition'}, 'id': 'call_x47a', 'type': 'tool_call'}], usage_metadata={'input_tokens': 243, 'output_tokens': 20, 'total_tokens': 263})]}}
----
{'tools': {'messages': [ToolMessage(content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decompose

In [27]:
# we dont need to use chatMessageHistory

In [36]:
from langgraph.checkpoint.memory import MemorySaver
memory=MemorySaver()
agent_executor=create_react_agent(llm, tools, checkpointer=memory)

If we input a query that does not require a retrieval step, the agent does not execute one

In [37]:
config={"configurable":{"thread_id":"abc123"}}

for s in agent_executor.stream(
    {"messages":[HumanMessage(content="Hi! I am Moin")]}, config=config):
    print(s)
    print('----')

{'agent': {'messages': [AIMessage(content='Hello Moin. How can I assist you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 244, 'total_tokens': 256, 'completion_time': 0.048417196, 'prompt_time': 0.118446081, 'queue_time': -0.007212127999999998, 'total_time': 0.166863277}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_5c5d1b5cfb', 'finish_reason': 'stop', 'logprobs': None}, id='run-ce96c3cb-f67a-4eeb-863c-f9932fb1fcbf-0', usage_metadata={'input_tokens': 244, 'output_tokens': 12, 'total_tokens': 256})]}}
----


If we input a query that does require a retrieval step, the agent generates the input to the tool

In [38]:
for s in agent_executor.stream(
    {"messages":[HumanMessage(content="What is Task Decomposition?")]}, config=config):
    print(s)
    print('----')

{'agent': {'messages': [AIMessage(content='Task decomposition is a process of breaking down complex tasks or problems into smaller, more manageable sub-tasks or components. This technique is used to simplify the problem-solving process, identify the key elements of a task, and create a clear plan of action. By decomposing a task, you can better understand the requirements, identify potential roadblocks, and develop a more effective solution.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 76, 'prompt_tokens': 271, 'total_tokens': 347, 'completion_time': 0.305075805, 'prompt_time': 0.064335148, 'queue_time': 0.005450032000000007, 'total_time': 0.369410953}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_b6828be2c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-601b845d-e032-4eff-840b-676214f36a77-0', usage_metadata={'input_tokens': 271, 'output_tokens': 76, 'total_tokens': 347})]}}
----


In [39]:
for s in agent_executor.stream(
    {"messages":[HumanMessage(content="What according to bloh posts are common ways of doing it? redo the search")]}, config=config):
    print(s)
    print('----')

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_rnx1', 'function': {'arguments': '{"query": "common task decomposition methods from Autonomous Agents blog post"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 372, 'total_tokens': 398, 'completion_time': 0.104, 'prompt_time': 0.099256861, 'queue_time': 0.06470342200000001, 'total_time': 0.203256861}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_b3ae7e594e', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-12e8af0b-e680-4a22-94fc-98ab57e6d030-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'common task decomposition methods from Autonomous Agents blog post'}, 'id': 'call_rnx1', 'type': 'tool_call'}], usage_metadata={'input_tokens': 372, 'output_tokens': 26, 'total_tokens': 398})]}}
----
{'tools': {'messages': [ToolMessage(content='Tree of Thoughts (Yao et al. 