In [57]:
import add_packages
import config
from pprint import pprint
import os, typing

from toolkit.langchain import (
  document_loaders, text_splitters, text_embedding_models, vectorstores, 
  chat_models, prompts, utils, output_parsers, agents, memories, chains,
  runnables, agent_tools
)

# LangChain

## Chatbot (Usecase)

Design and implement an LLM-powered chatbot. Key components include Chat Models, Prompt Templates, Chat History, and Retrievers. These elements work together to create a conversational chatbot.

### Start


In [None]:
# Retrievers incorporating domain-specific knowledge to model

# Use a document loader to pull data from a webpage.
loader = document_loaders.WebBaseLoader("https://docs.smith.langchain.com/")
document = loader.load()

# Split text into smaller chunks for LLM's context window
text_splitter = text_splitters.RecursiveCharacterTextSplitter(
  chunk_size=500, chunk_overlap=0,
)
docs = text_splitter.split_documents(document)

# Embed and store chunks in a vector database.
vectorstore = vectorstores.chroma.Chroma.from_documents(
  documents=docs, embedding=text_embedding_models.OpenAIEmbeddings()
)
# Create a retriever from the initialized vectorstore.
retriever = vectorstore.as_retriever(k=4)

# Initialize the chat model for the chatbot's brain.
# The model lacks a concept of state, doesn't consider previous conversation context
# Must input the entire conversation history
model = chat_models.chat_openai

# In-memory ChatMessageHistory saves and loads chat messages.
chat_history = memories.ChatMessageHistory()

In [None]:
# Prompt template for easier formatting. Pipe a chain into the model.
# Accept documents as context to "stuff" input documents into prompt, 
# handling formatting and passing other arguments directly.
prompt = prompts.ChatPromptTemplate.from_messages([
  (
    "system",
    "Answer the user's questions based on the below context:\n\n{context}"
  ),
  # Inserts chat messages into chain's input as chat_history to the prompt
  prompts.MessagesPlaceholder(variable_name="messages")
])

# Accepts dict as input, parses string for retriever.
chain_documents = chains.create_stuff_documents_chain(model, prompt)

#*-----------------------------------------------------------------------------

# When a follow-up question is asked, the retrieved docs may not include 
# information from the previous question. The query is passed verbatim to the 
# retriever, so adding a query transformation step to remove references from the
# input can help retrieve more informative documents.
prompt_query_transform = prompts.ChatPromptTemplate.from_messages([
  prompts.MessagesPlaceholder(variable_name="messages"),
  (
    "user",
    ("Given the above conversation, generate a search query to look up in order "
     "to get information relevant to the conversation. Only response with the "
     "query, nothing else.")
  )
])

chain_query_transforming_retriever = runnables.RunnableBranch(
  (
    lambda x: len(x.get("messages", [])) == 1,
    # Pass the message's to the retriever if there is only one message.
    (lambda x: x["messages"][-1].content) | retriever,
  ),
  # Messages are passed to LLM chain to transform the query before passing it to
  # the retriever
  prompt_query_transform | model | output_parsers.StrOutputParser() | retriever,
).with_config(run_name="chain_chat_retriever")

#*-----------------------------------------------------------------------------

chain_conversational_retrieval = runnables.RunnablePassthrough.assign(
  context=chain_query_transforming_retriever
).assign(
  answer=chain_documents,
)

In [None]:
chat_history.add_user_message("how can langsmith help with testing?")

In [None]:
response = chain_conversational_retrieval.invoke({
  "messages": chat_history.messages, 
})
chat_history.add_ai_message(response["answer"])

In [None]:
chat_history.add_user_message("tell me more about that")
response = chain_conversational_retrieval.invoke({
  "messages": chat_history.messages, 
})
chat_history.add_ai_message(response["answer"])

### Memory management

A key feature of chatbots is their ability to use previous conversation turns as context. This state management can take various forms:

- Stuffing previous messages into a chat model prompt.
- Trimming old messages to reduce distracting information.
- Synthesizing summaries for long conversations.
  
Use LangChain’s message history class stores and loads chat messages from persistent storage.

Store conversation turns directly for chain.

#### Automatic history management


In [None]:
model = chat_models.chat_openai


# Prompt includes final input variable populating HumanMessage template after 
# chat history. Expect chat_history parameter with messages BEFORE current messages.
prompt = prompts.ChatPromptTemplate.from_messages([
  (
    "system",
    "You are a helpful assistant. Answer all question to the best of your ability."
  ),
  prompts.MessagesPlaceholder(variable_name="chat_history"),
  (
    "human",
    "{input}"
  ),
])

chain = prompt | model

# Chain wrapper RunnableWithMessageHistory handle process history automatically.
# Pass latest input to conversation, RunnableWithMessageHistory appends input
# variable to chat history.
chat_history = memories.ChatMessageHistory()

chain_memorizable = runnables.RunnableWithMessageHistory(
  chain,
  # Factory function returns message history for session id, enabling handling
  # multiple users at once with different messages for each conversation.
  lambda session_id: chat_history,
  # specifies the tracked input stored in chat history.
  input_messages_key="input",
  # specifies previous messages injected into prompt as. 
  # Prompt has MessagesPlaceholder named chat_history
  history_messages_key="chat_history",
  # specifies which output to store as history for chains with multiple outputs, 
  # output_messages_key=
)


In [None]:
# Invoke chain with an additional configurable field specifying the session_id
# to pass to the factory function. In real-world chains, return a chat history 
# corresponding to the passed session.

# Initial user input
inputs = [
  "Translate this sentence from English to French: I love programming.",
  "What did I just ask you?"
]

for input in inputs:
  response = chain_memorizable.invoke(
    {"input": input},
    {"configurable": {"session_id": "unused"}},
  )

  print(response.content) 

#### Modifying chat history


##### Trimming messages

LLMs and chat models have limited context windows, limit distractions by only storing the most recent n messages.

In [None]:
model = chat_models.chat_openai


# Prompt includes final input variable populating HumanMessage template after 
# chat history. Expect chat_history parameter with messages BEFORE current messages.
prompt = prompts.ChatPromptTemplate.from_messages([
  (
    "system",
    "You are a helpful assistant. Answer all question to the best of your ability."
  ),
  prompts.MessagesPlaceholder(variable_name="chat_history"),
  (
    "human",
    "{input}"
  ),
])

chain = prompt | model

# Chain wrapper RunnableWithMessageHistory handle process history automatically.
# Pass latest input to conversation, RunnableWithMessageHistory appends input
# variable to chat history.
chat_history = memories.ChatMessageHistory()

chain_memorizable = runnables.RunnableWithMessageHistory(
  chain,
  # Factory function returns message history for session id, enabling handling
  # multiple users at once with different messages for each conversation.
  lambda session_id: chat_history,
  # specifies the tracked input stored in chat history.
  input_messages_key="input",
  # specifies previous messages injected into prompt as. 
  # Prompt has MessagesPlaceholder named chat_history
  history_messages_key="chat_history",
  # specifies which output to store as history for chains with multiple outputs, 
  # output_messages_key=
)


# Use the message history with the RunnableWithMessageHistory chain.
chat_history = memories.ChatMessageHistory()

# Reduce context window, trim messages to 2 recent ones using clear method, 
# add back to history. Place method at front of chain for consistent calling.
def trim_messages(chain_input, max_messages: int = 2):
  stored_messages = chat_history.messages
  
  if len(stored_messages) <= max_messages:
    return False
  
  chat_history.clear()
  
  # Our history keeps the most recent conversation. Next time the chain is 
  # called, only the `max_messages` most recent messages will be passed to the model. 
  for message in stored_messages[-max_messages:]:
    chat_history.add_message(message)
  
  return True

chain_memorizable_with_trimming = (
  runnables.RunnablePassthrough.assign(trimmed_messages=trim_messages)
  | chain_memorizable
)

In [None]:
inputs = [
  "My name is Bob",
  "What is 1 + 1",
  "Add one more",
  "Add one more",
  "Add one more",
  "What is my name?"
  "Add one more",
]

for input in inputs:
  response = chain_memorizable_with_trimming.invoke(
    {"input": input},
    {"configurable": {"session_id": "unused"}},
  )

  print(response.content) 

##### Summary memory

In [None]:
# Call LLM to generate a conversation summary before calling the chain.
# Modify prompt for LLM to expect condensed summary instead of chat history.

model = chat_models.chat_openai


# Prompt includes final input variable populating HumanMessage template after 
# chat history. Expect chat_history parameter with messages BEFORE current messages.
prompt = prompts.ChatPromptTemplate.from_messages([
  (
    "system",
    ("You are a helpful assistant. Answer all question to the best of your "
     "ability. The provided chat history includes facts about the user you "
     "are speaking with.")
  ),
  prompts.MessagesPlaceholder(variable_name="chat_history"),
  (
    "human",
    "{input}"
  ),
])

chain = prompt | model

# Chain wrapper RunnableWithMessageHistory handle process history automatically.
# Pass latest input to conversation, RunnableWithMessageHistory appends input
# variable to chat history.
chat_history = memories.ChatMessageHistory()

chain_memorizable = runnables.RunnableWithMessageHistory(
  chain,
  # Factory function returns message history for session id, enabling handling
  # multiple users at once with different messages for each conversation.
  lambda session_id: chat_history,
  # specifies the tracked input stored in chat history.
  input_messages_key="input",
  # specifies previous messages injected into prompt as. 
  # Prompt has MessagesPlaceholder named chat_history
  history_messages_key="chat_history",
  # specifies which output to store as history for chains with multiple outputs, 
  # output_messages_key=
)

def summarize_message(chain_input):
  stored_messages = chat_history.messages
  if len(stored_messages) == 0:
    return False
  
  prompt_summarization = prompts.ChatPromptTemplate.from_messages([
    prompts.MessagesPlaceholder(variable_name="chat_history"),
    (
      "user",
      ("Distill the above chat messages into a single summary message. Include "
       "as many specific details as you can.")
    )
  ])
  
  chain_summarization = prompt_summarization | model
  
  summarized_message = chain_summarization.invoke({"chat_history": stored_messages})
  
  chat_history.clear()
  chat_history.add_message(summarized_message)
  
  return True

chain_memorizable_with_summarization = (
  runnables.RunnablePassthrough.assign(summarized_message=summarize_message)
  | chain_memorizable
)

In [None]:
inputs = [
  "What is 1 + 1",
  "Add one more",
  "Add one more",
  "Add one more",
  "Add one more",
]

for input in inputs:
  response = chain_memorizable_with_summarization.invoke(
    {"input": input},
    {"configurable": {"session_id": "unused"}},
  )

  print(response.content) 

### Retrieval using Query transformation

Retrieval enhances chatbot responses with external data.

In [None]:
# Use document loader to extract text from documents.
loader = document_loaders.WebBaseLoader("https://docs.smith.langchain.com/")
document = loader.load()

# Split text into smaller chunks for LLM's context window and store in vector database.
text_splitter = text_splitters.RecursiveCharacterTextSplitter(
  chunk_size=500, chunk_overlap=0,
)
documents = text_splitter.split_documents(document)

# Embed and store chunks in a vector database.
vectorstore = vectorstores.chroma.Chroma.from_documents(
  documents=documents, embedding=text_embedding_models.OpenAIEmbeddings(),
)
# Create a retriever from the initialized vectorstore.
retriever = vectorstore.as_retriever(k=4)

In [None]:
# Chatbots must handle follow-up questions from users.
# The retriever only pulls documents most similar to the query because it has no 
# innate concept of state. 
# Transform the query into a standalone query without any external references.
prompt_query_transform = prompts.ChatPromptTemplate.from_messages([
  prompts.MessagesPlaceholder(variable_name="messages"),
  (
    "user",
    ("Given the above conversation, generate a search query to look up in order "
     "to get information relevant to the conversation. Only response to the query, "
     "nothing else.")
  )
])

# Transformed query pulls up context documents
chain_query_transformation = runnables.RunnableBranch(
  (
    lambda x: len(x.get("messages", [])) == 1,
    # If only one message, then we just pass that message's content to retriever
    (lambda x: x["messages"][-1].content) | retriever,
  ),
  # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
  prompt_query_transform | model | output_parsers.StrOutputParser() | retriever,
).with_config(run_name="chat_retriever_chain")

# Use query transformation chain to improve retrieval chain for followup questions.
SYSTEM_TEMPLATE = """\
Answer the user's questions based on the below context. 
If the context doesn't contain any relevant information to the question, don't \
make something up and just say "I don't know":

<context>
{context}
</context>
"""
prompt_qa = prompts.ChatPromptTemplate.from_messages([
  ("system", SYSTEM_TEMPLATE),
  prompts.MessagesPlaceholder(variable_name="messages"),
])

chain_documents = chains.create_stuff_documents_chain(model, prompt_qa)

chain_conversational_retrieval = runnables.RunnablePassthrough.assign(
  context=chain_query_transformation,
).assign(
  answer=chain_documents,
)

In [None]:
chain_conversational_retrieval.invoke({
  "messages": [
    prompts.HumanMessage(
        content="Can LangSmith help test my LLM applications?"
    ),
    prompts.AIMessage(
        content="Yes, LangSmith can help test and evaluate your LLM applications. It allows you to quickly edit examples and add them to datasets to expand the surface area of your evaluation sets or to fine-tune a model for improved quality or reduced costs. Additionally, LangSmith can be used to monitor your application, log all traces, visualize latency and token usage statistics, and troubleshoot specific issues as they arise."
    ),
    prompts.HumanMessage(content="Tell me more!"),
  ]
})

### Tool usage

Create conversational agents (chatbots) to interact with systems and APIs using tools.



In [58]:
# Create an agent that can chat with users and search for information.

# Initialize Tavily and an OpenAI chat model capable of tool calling.
tool_search_tavily = agent_tools.TavilySearchResults(max_results=1)
tools = [tool_search_tavily]

model = chat_models.chat_openai

# Make agent conversational by choosing a prompt with a placeholder for storing 
# chat history.
prompt = prompts.ChatPromptTemplate.from_messages([
  (
    "system",
    ("You are a helpful assistant. You may not need to use tools for every " 
     "quer - the user may just want to chat!")
  ),
  prompts.MessagesPlaceholder(variable_name="chat_history"),
  (
    "human",
    "{input}",
  ),
  prompts.MessagesPlaceholder(variable_name="agent_scratchpad"),
])

# Assemble the agent.
agent = agents.create_openai_tools_agent(
  llm=model, tools=tools, prompt=prompt,
) 

agent_executor = agents.AgentExecutor(
  agent=agent, tools=tools, verbose=True,
)

# Wrap the agent executor in a RunnableWithMessageHistory class to manage 
# history messages. 
# Set the output_messages_key property when initializing the wrapper for agent
# executor with multiple outputs.
chat_history = memories.ChatMessageHistory()
agent_executor_conversable = runnables.RunnableWithMessageHistory(
  agent_executor,
  lambda session_id: chat_history,
  input_messages_key="input",
  output_messages_key="output",
  history_messages_key="chat_history",
)
config_agent_executor = {
  "configurable": {"session_id": "unused"},
}

In [60]:
queries = [
  "I'm Nemo!",
  "What is my name?",
]

for query in queries:
  agent_executor_conversable.invoke({"input": query}, config_agent_executor)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello Nemo! How can I assist you today?[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Nemo![0m

[1m> Finished chain.[0m
