In [1]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_TRACING_v2"] = "true"
os.environ["LANGSMITH_PROJECT"] = "tutorial_chatbot"
os.environ["LANGSMITH_API_KEY"] = ""

In [2]:
from dotenv import load_dotenv
load_dotenv(dotenv_path="../.env", override=True)

True

In [3]:
from langsmith import utils
utils.tracing_is_enabled()

True

In [4]:
from langchain_ollama import ChatOllama

model = ChatOllama(model=os.getenv("LLM_MODEL"))

In [6]:
from langchain_core.messages import HumanMessage

model.invoke([HumanMessage(content="Hi! I'm Bob")])

AIMessage(content="Hi Bob! It's nice to meet you. Is there something I can help you with, or would you like to chat for a bit?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-04-22T14:42:28.082193655Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3056365505, 'load_duration': 2835813179, 'prompt_eval_count': 30, 'prompt_eval_duration': 65000000, 'eval_count': 30, 'eval_duration': 152000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-3b92133f-0b38-422a-92ed-8367c1df8ce2-0', usage_metadata={'input_tokens': 30, 'output_tokens': 30, 'total_tokens': 60})

In [7]:
model.invoke([HumanMessage(content="What's my name?")])

AIMessage(content="I don't have any information about you, so I don't know your name. We just started our conversation, and I'm here to help with any questions or topics you'd like to discuss. What would you like to talk about?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-04-22T14:42:54.800872843Z', 'done': True, 'done_reason': 'stop', 'total_duration': 279870139, 'load_duration': 19847665, 'prompt_eval_count': 30, 'prompt_eval_duration': 8000000, 'eval_count': 49, 'eval_duration': 250000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-a307848f-24f4-4e63-8c19-2cc47770d507-0', usage_metadata={'input_tokens': 30, 'output_tokens': 49, 'total_tokens': 79})

We can see that it doesn't take the previous conversation turn into context, and cannot answer the question. This makes for a terrible chatbot experience!
To get around this, we need to pass the entire conversation history into the model. Let's see what happens when we do that:

In [12]:
from langchain_core.messages import AIMessage

model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

AIMessage(content="I remember! Your name is Bob. You told me that earlier! Is there something on your mind that you'd like to chat about, or would you like some fun conversation starters instead?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-04-22T14:44:19.513032688Z', 'done': True, 'done_reason': 'stop', 'total_duration': 225511627, 'load_duration': 22240100, 'prompt_eval_count': 55, 'prompt_eval_duration': 5000000, 'eval_count': 39, 'eval_duration': 195000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-b60e0b8d-2b61-4b57-be58-a37b9f498dc2-0', usage_metadata={'input_tokens': 55, 'output_tokens': 39, 'total_tokens': 94})

LangGraph implements a built-in persistence layer, making it ideal for chat applications that support multiple conversational turns.

Wrapping our chat model in a minimal LangGraph application allows us to automatically persist the message history, simplifying the development of multi-turn applications.

LangGraph comes with a simple in-memory checkpointer, which we use below. See its documentation for more detail, including how to use different persistence backends (e.g., SQLite or Postgres).

In [14]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

We now need to create a config that we pass into the runnable every time. This config contains information that is not part of the input directly, but is still useful. In this case, we want to include a thread_id. This should look like:

In [15]:
config = {"configurable": {"thread_id": "abc123"}}