In [6]:
# https://python.langchain.com/v0.2/docs/tutorials/qa_chat_history/

### Chains


In [7]:
import bs4
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter

from core.llm import CHAT_LLM, EMBEDDINGS


# 1. Load, chunk and index the contents of the blog to create a retriever.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=EMBEDDINGS)
retriever = vectorstore.as_retriever()


# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(CHAT_LLM, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [8]:
response = rag_chain.invoke({"input": "What is Task Decomposition?"})
response["answer"]

"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps, making them more manageable. It involves transforming big tasks into multiple smaller tasks to enhance model performance and provide insight into the model's thinking process. This approach allows models to utilize more computation at test time by thinking step by step."

### Adding chat history

We'll need to update two things about our existing app:

1. Prompt: Update our prompt to support historical messages as an input.
2. Contextualizing questions: Add a sub-chain that takes the latest user question and reformulates it in the context of the chat history. This can be thought of simply as building a new "history aware" retriever. Whereas before we had:
   - query -> retriever \
     Now we will have:
   - (query, conversation history) -> LLM -> rephrased query -> retriever


In [9]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder


contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    CHAT_LLM, retriever, contextualize_q_prompt
)

- Consideration \
  Rephrasing might cause misunderstanding in Korean

- What are common ways of doing it? -> What are some common methods used for task decomposition?


In [10]:
from langchain_core.messages import HumanMessage, AIMessage


chat_history = [
    HumanMessage(content="What is Task Decomposition?"),
    AIMessage(
        content="Task decomposition is a technique used to break down complex tasks into smaller and simpler steps, making them more manageable for an agent or model to handle. It involves transforming a big task into multiple smaller tasks, allowing for a step-by-step approach to problem-solving. This method sheds light on the interpretation of the model's thinking process by utilizing more computation at test time."
    ),
]

In [12]:
chain = contextualize_q_prompt | CHAT_LLM
chain.invoke(
    {
        "input": "What are common ways of doing it?",
        "chat_history": chat_history,
    }
)

AIMessage(content='What are some common methods used for task decomposition?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 155, 'total_tokens': 165}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-8a6321e0-75dc-4ddf-837b-48bd2acb6c76-0', usage_metadata={'input_tokens': 155, 'output_tokens': 10, 'total_tokens': 165})

In [13]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(CHAT_LLM, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [14]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is Task Decomposition?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

second_question = "What are common ways of doing it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Task decomposition can be achieved through various methods, including using Language Model (LLM) with simple prompting like "Steps for XYZ" or asking for subgoals to achieve a task. Task-specific instructions can also be used, such as requesting a story outline for writing a novel. Additionally, human inputs can be utilized to decompose tasks into smaller, more manageable steps.


In [15]:
# How to add message history: https://python.langchain.com/v0.2/docs/how_to/message_history/
# Redis integration: https://python.langchain.com/v0.2/docs/integrations/memory/redis_chat_message_history/

In [16]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory


store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

rst = conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={"configurable": {"session_id": "abc123"}},
)
rst["answer"]

"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps, making them more manageable for AI systems. It involves transforming big tasks into multiple smaller tasks to enhance model performance and provide insights into the model's thinking process. This approach allows models to utilize more computation at test time by thinking step by step."

In [17]:
rst = conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)
rst["answer"]

'Common ways of task decomposition include using techniques like Chain of Thought (CoT) to prompt models to "think step by step" and decompose tasks into smaller steps. Task decomposition can also be done using language models with simple prompting, task-specific instructions, or human inputs to break down complex tasks into more manageable components. Additionally, task decomposition can involve generating multiple thoughts per step to explore various reasoning possibilities.'

In [18]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}")

User: What is Task Decomposition?
AI: Task decomposition is a technique used to break down complex tasks into smaller and simpler steps, making them more manageable for AI systems. It involves transforming big tasks into multiple smaller tasks to enhance model performance and provide insights into the model's thinking process. This approach allows models to utilize more computation at test time by thinking step by step.
User: What are common ways of doing it?
AI: Common ways of task decomposition include using techniques like Chain of Thought (CoT) to prompt models to "think step by step" and decompose tasks into smaller steps. Task decomposition can also be done using language models with simple prompting, task-specific instructions, or human inputs to break down complex tasks into more manageable components. Additionally, task decomposition can involve generating multiple thoughts per step to explore various reasoning possibilities.


### Trying it together

![](https://python.langchain.com/v0.2/assets/images/conversational_retrieval_chain-5c7a96abe29e582bc575a0a0d63f86b0.png)


In [19]:
import bs4
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_text_splitters import RecursiveCharacterTextSplitter


### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=EMBEDDINGS)
retriever = vectorstore.as_retriever()


### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    CHAT_LLM, retriever, contextualize_q_prompt
)


### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(CHAT_LLM, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [20]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps, making them more manageable for an agent or model to handle. It involves transforming a big task into multiple smaller tasks, allowing for a more detailed and structured approach to problem-solving. This process sheds light on the interpretation of the model's thinking process by guiding it to think step by step through the task."

In [21]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'Task decomposition can be achieved through various methods, including using Chain of Thought (CoT) to prompt models to think step by step, decomposing tasks into smaller steps. Other common ways include utilizing task-specific instructions, such as providing simple prompts like "Steps for XYZ" or asking for subgoals to achieve a specific task. Additionally, task decomposition can also involve human inputs to break down complex tasks into more manageable components.'

### Agents

Advantages of agent implementation

1. Agents generate the input to the retriever directly, **without necessarily needing us to explicitly build in contextualization**, as we did above;
2. Agents can execute multiple retrieval steps in service of a query, or refrain from executing a retrieval step altogether (e.g., in response to a generic greeting from a user).


#### Retrieval tool


In [22]:
from langchain.tools.retriever import create_retriever_tool


tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excepts from the Autonomous Agents blog post.",
)
tools = [tool]

In [23]:
tool.invoke("task decomposition")

'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.\n\nFig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The mode

#### Agent constructor


In [24]:
from langgraph.prebuilt import create_react_agent

agent_executor = create_react_agent(CHAT_LLM, tools)

query = "What is Task Decomposition?"

for s in agent_executor.stream({"messages": [HumanMessage(query)]}):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_iANuBsD4LVeqZ3Fa8AFQGhvA', 'function': {'arguments': '{"query":"Task Decomposition"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 69, 'total_tokens': 88}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'tool_calls', 'logprobs': None, 'content_filter_results': {}}, id='run-5f0862a2-3617-4615-b904-ee74dad65e7a-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'Task Decomposition'}, 'id': 'call_iANuBsD4LVeqZ3Fa8AFQGhvA', 'type': 'tool_call'}], usage_metadata={'input_tokens': 69, 'output_tokens': 19, 'total_tokens': 88})]}}
----
{'tools': {'messages': [ToolMessage(content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multip

Use `checkpointer` instead of chat message history


In [25]:
from langgraph.checkpoint.sqlite import SqliteSaver

memory = SqliteSaver.from_conn_string(":memory:")
agent_executor = create_react_agent(CHAT_LLM, tools, checkpointer=memory)

In [26]:
config = {"configurable": {"thread_id": "abc123"}}

for s in agent_executor.stream(
    {"messages": [HumanMessage(content="Hi! I'm bob")]}, config=config
):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='Hello Bob! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 68, 'total_tokens': 79}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-6d7c31ef-ef0b-4090-a68d-16cb7dbdd479-0', usage_metadata={'input_tokens': 68, 'output_tokens': 11, 'total_tokens': 79})]}}
----


In [27]:
config = {"configurable": {"thread_id": "abc123"}}

for s in agent_executor.stream(
    {"messages": [HumanMessage(content="What is Task Decomposition?")]}, config=config
):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_9TQtFr2rEYLN1MG3flZB0p8M', 'function': {'arguments': '{"query":"Task Decomposition"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 92, 'total_tokens': 111}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'tool_calls', 'logprobs': None, 'content_filter_results': {}}, id='run-81f1083d-a3c9-4a66-862c-3b429830a1dd-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'Task Decomposition'}, 'id': 'call_9TQtFr2rEYLN1MG3flZB0p8M', 'type': 'tool_call'}], usage_metadata={'input_tokens': 92, 'output_tokens': 19, 'total_tokens': 111})]}}
----
{'tools': {'messages': [ToolMessage(content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates mult

Tool extracts `query`

```python
'function': {'arguments': '{"query":"Task Decomposition"}'
```

- `What is Task Decomposition?` -> `Task Decomposition` \
  Above, instead of inserting our query verbatim into the tool, the agent stripped unnecessary words like "what" and "is".


In [28]:
query = "What according to the blog post are common ways of doing it? redo the search"

for s in agent_executor.stream(
    {"messages": [HumanMessage(content=query)]}, config=config
):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_EC5piqknUTLi6YWxVnqOoJtl', 'function': {'arguments': '{"query":"Common ways of Task Decomposition"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 778, 'total_tokens': 800}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'tool_calls', 'logprobs': None, 'content_filter_results': {}}, id='run-1cf39976-ef34-40f4-91dc-d4e8092a247d-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'Common ways of Task Decomposition'}, 'id': 'call_EC5piqknUTLi6YWxVnqOoJtl', 'type': 'tool_call'}], usage_metadata={'input_tokens': 778, 'output_tokens': 22, 'total_tokens': 800})]}}
----
{'tools': {'messages': [ToolMessage(content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple 

#### Tying it together


In [29]:
import bs4
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools.retriever import create_retriever_tool
from langchain_community.vectorstores import FAISS
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.prebuilt import create_react_agent

memory = SqliteSaver.from_conn_string(":memory:")


### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=EMBEDDINGS)
retriever = vectorstore.as_retriever()


### Build retriever tool ###
tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post.",
)
tools = [tool]


agent_executor = create_react_agent(CHAT_LLM, tools, checkpointer=memory)

In [33]:
agent_executor.model_runnable

AttributeError: 'CompiledStateGraph' object has no attribute 'model_runnable'

In [None]:
# - Retriever: https://python.langchain.com/v0.2/docs/how_to/#retrievers