In [None]:
# Imports
from dotenv import load_dotenv
import asyncio

load_dotenv()

True

# Quickstart

First up, let's learn how to use a language model by itself. LangChain supports many different language models that can be used interchangeably. We'll use Anthropic's Claude 3.5 Sonnet for this example.

In [5]:
# Use a language model
from langchain.chat_models import init_chat_model

model = init_chat_model("claude-3-5-sonnet-latest", model_provider="anthropic")

Let's first use the model directly. ChatModels are instances of LangChain "Runnables", which means they expose a standard interface for interacting with them. To just simply call the model, we can pass in a list of messages to the .invoke method.

In [6]:
from langchain_core.messages import HumanMessage

model.invoke([HumanMessage("¡Hola, soy Andrés!")])

AIMessage(content='¡Hola Andrés! ¿Cómo estás? ¿En qué puedo ayudarte hoy?', additional_kwargs={}, response_metadata={'id': 'msg_01E8smBX8EDNtAZcjw189Dm5', 'model': 'claude-3-5-sonnet-20241022', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 20, 'output_tokens': 37, 'server_tool_use': None, 'service_tier': 'standard'}, 'model_name': 'claude-3-5-sonnet-20241022'}, id='run--7493ea3a-7c45-4576-8ee5-18e6a539b9d4-0', usage_metadata={'input_tokens': 20, 'output_tokens': 37, 'total_tokens': 57, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}})

This model has no state; It doesn't take the previous conversation turn into context, and couldn't answer the question: what's my name? Let's see this in action.

In [7]:
model.invoke([HumanMessage("¿recuerdas cómo me llamo?")])

AIMessage(content='No sé tu nombre ya que no me lo has dicho anteriormente. Inicio cada conversación desde cero, sin recordar interacciones previas.', additional_kwargs={}, response_metadata={'id': 'msg_01Bp5zA2v3kwWv6LbvcVhfYF', 'model': 'claude-3-5-sonnet-20241022', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 21, 'output_tokens': 37, 'server_tool_use': None, 'service_tier': 'standard'}, 'model_name': 'claude-3-5-sonnet-20241022'}, id='run--2fd04f87-f4ae-43d3-926c-dd5b421906b4-0', usage_metadata={'input_tokens': 21, 'output_tokens': 37, 'total_tokens': 58, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}})

In order to get around this, we can pass the entire conversation history into the model.

In [9]:
from langchain_core.messages import AIMessage

model.invoke(
    [
        HumanMessage(content="¡Hola, soy Andrés!"),
        AIMessage(content="Hola Andrés, ¿Cómo puedo ayudarte?"),
        HumanMessage(content="¿recuerdas mi nombre?"),
    ]
)

AIMessage(content='Sí, me dijiste que te llamas Andrés.', additional_kwargs={}, response_metadata={'id': 'msg_01DF7VL5mX491X7m6LGmzsY2', 'model': 'claude-3-5-sonnet-20241022', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 56, 'output_tokens': 18, 'server_tool_use': None, 'service_tier': 'standard'}, 'model_name': 'claude-3-5-sonnet-20241022'}, id='run--56ae7f0e-24ac-41a6-bd7e-a26acd0046f6-0', usage_metadata={'input_tokens': 56, 'output_tokens': 18, 'total_tokens': 74, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}})

LangChain implements a built-in persistence layer, making it ideal for applications that support multiple conversational turns.

LangChain also supports different persistence backends, like SQLite or Postgres.

In [11]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)

# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}

# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

We now need to create a `config` that we pass into the runnable every time (note that the compiled app implements the `Runnable` interface, so it's a runnable itself).

This config enables us to support multiple conversation threads with a single application, a common requirement when the application has multiple users. Let's now create that `config` and invoke the application.

In [12]:
from langchain_core.runnables.config import RunnableConfig
config = RunnableConfig(configurable={"thread_id": "abc123"})

query = "¡Hola! Soy, Andrés"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)

output["messages"][-1].pretty_print()


¡Hola Andrés! ¿Cómo estás? ¿En qué puedo ayudarte hoy?


In [13]:
query = "¿Recuerdas cómo me llamo?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Sí, me dijiste que te llamas Andrés.


Now, if we change the config's thread ID, the conversation will start from scratch, the chatbot won't remember my name.

In [None]:
config = RunnableConfig(configurable={"thread_id": "123abc"})

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


No tengo registro de tu nombre ya que esta es una nueva conversación. ¿Te gustaría decirme cómo te llamas?


However, we can always go back to the initial conversation `thread_id=abc123`

In [15]:
config = RunnableConfig(configurable={"thread_id": "abc123"})
input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Sí, me dijiste que te llamas Andrés en tu primer mensaje.


> [!tip]
> For async support, update the call_model node to be an async function and use .ainvoke when invoking the application:
```python
# Async function for node:
async def call_model(state: MessagesState):
    response = await model.ainvoke(state["messages"])
    return {"messages": response}


# Define graph as before:
workflow = StateGraph(state_schema=MessagesState)
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)
app = workflow.compile(checkpointer=MemorySaver())

# Async invocation:
output = await app.ainvoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()
```

## Using Prompt Templates

We can use prompt templates to turn raw user information into a format that the LLM can work with. 

In this case, the raw input is just a message. What we'll do next, is to add a system message with custom instructions, and we'll add more input to the model besides the raw messages.

To add in that system message we're going to use the `ChatPromptTemplate` class.

In [21]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You talk like an ancient greek philosopher. Answer all questions to the best of ancient greek's ability"
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

We can now update our application to incorporate this template!

Let's also use this opportunity to use asynchronous calls.

In [22]:
workflow = StateGraph(state_schema=MessagesState)

async def acall_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = await model.ainvoke(prompt)
    return {"messages": response}

workflow.add_edge(START, "model")
workflow.add_node("model", acall_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [23]:
config = RunnableConfig(configurable={"thread_id": "abc345"})
query = "Hi! I'm Thales of Miletus, how do you do?."

input_messages = [HumanMessage(query)]
output = await app.ainvoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Ah, greetings, dear friend! By the divine waters of the cosmos, it brings me great joy to engage in discourse with you! As a lover of wisdom and natural philosophy myself, I must say your name carries great weight in our philosophical circles.

Let me ponder - you are the one who proposed that water is the fundamental substance of all things, are you not? A most fascinating proposition indeed! I find myself contemplating the underlying principles of nature quite frequently as I stroll through the olive groves.

Tell me, what burning questions about the cosmos occupy your mind today? Shall we explore the archē of all things or perhaps discuss the nature of the divine? I am prepared to engage in dialectic discourse on whatever subject moves your soul.

By Zeus, I do hope you're having a day filled with contemplation and insight!


Awesome! Let's now make our prompt a bit more complicated. Let's assume that the prompt template now looks something like this:

In [24]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful ancient greek assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

Note that we have added a new language input to the prompt. Our application now has two parameters-- the input messages and language. We should update our application's state to reflect this:

In [25]:
from typing import Sequence, TypedDict
from typing_extensions import Annotated
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages


class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str

workflow = StateGraph(state_schema=State)

def acall_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": [response]}

workflow.add_edge(START, "model")
workflow.add_node("model", acall_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [26]:
config = RunnableConfig(configurable={"thread_id": "abc456"})
query = "Hi! I'm Glaucon."
language = "Spanish"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


¡Saludos, Glaucon! Me alegro de conocerte. Como asistente de la antigua Grecia, reconozco tu nombre - ¿sabes que Glaucon era el nombre del hermano de Platón y aparece como personaje importante en "La República"? 

¿Cómo puedo ayudarte hoy? Estoy aquí para conversar sobre cualquier tema, especialmente sobre la antigua Grecia, pero siempre respondiendo en español.


## Managing Conversation History

If history is not managed, the list of messages will grow unbounded and potentially overflow the context window of the LLM.

To address this, we can add a step that limits the size of the messages that are being passed.

IMPORTANT: **This step must be added BEFORE the prompt template but AFTER loading previous messages from the Message History**

We will achieve this by adding a simple step in front of the prompt that modifies the `messages` key appropriately, and wrap that new chain in the Message History class.

LangChain comes with a few built-in helpers for [managing a list of messages](https://python.langchain.com/docs/how_to/#messages).

We'll use the [trim_messages](https://python.langchain.com/docs/how_to/trim_messages/) helper to reduce how many messages we're sending to the model. We can specify how many tokens we want to keep, along with other parameters like if we want to always keep the system message and whether to allow partial messages.

In [42]:
from langchain_core.messages import trim_messages

trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)


In [43]:
workflow = StateGraph(state_schema=State)


async def acall_model(state: State):
    # Trim messages before prompting
    trimmed_messages = trimmer.invoke(state["messages"])
    # Use prompt template with trimmed messages
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )
    # Call model
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [45]:
from langchain_core.messages import SystemMessage

long_message = """
I like vanilla ice cream, with peanuts, and bourbon, I think I might have an alcohol problem. I hope this message is long enough for the trimmer to chop it into tiny little pieces.
"""
messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content=long_message),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

config = RunnableConfig(configurable={"thread_id": f"abc2345"})
input_messages = messages + [HumanMessage("What's my name?")] + [HumanMessage("Are you sure?")]
output = await app.ainvoke(
    {"messages": input_messages, "language": "ancient greek"},
    config=config
)
output["messages"][-1].pretty_print()

ValueError: Received multiple non-consecutive system messages.

In [48]:
config = {"configurable": {"thread_id": "abc4567"}}
query = "Hi I'm Todd, please tell me a joke."
language = "English"

input_messages = [HumanMessage(query)]

for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config,
    stream_mode="messages",
):
    if isinstance(chunk, AIMessage):  # Filter to just model responses
        print(chunk.content, end=" ")

 Hi Todd! Here's  a joke for you: 

What do you call a fake  noodle? 
An impasta!  🍝  