In [1]:
# https://python.langchain.com/v0.2/docs/tutorials/chatbot/

### Message history


In [2]:
from langchain_core.messages import HumanMessage, AIMessage

from core.llm import CHAT_LLM as model


model.invoke(
    [
        HumanMessage("My name is Bob."),
        AIMessage("Hello Bob!"),
        HumanMessage("What's my name?"),
    ]
)

AIMessage(content='Your name is Bob.', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 28, 'total_tokens': 33}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-9b0f47fd-0ad8-4e1a-b233-f62e4e4fb273-0', usage_metadata={'input_tokens': 28, 'output_tokens': 5, 'total_tokens': 33})

### History session


In [5]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(
    model,
    get_session_history,
)

config = {"configurable": {"session_id": "abc2"}}
response = with_message_history.invoke(
    [
        HumanMessage("My name is Bob."),
        AIMessage("Hello Bob!"),
        HumanMessage("What's my name?"),
    ],
    config,
)

print(response.content)
print(store)

Your name is Bob.
{'abc2': InMemoryChatMessageHistory(messages=[HumanMessage(content='My name is Bob.'), AIMessage(content='Hello Bob!'), HumanMessage(content="What's my name?"), AIMessage(content='Your name is Bob.', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 28, 'total_tokens': 33}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-0b6216c3-1a9c-4c74-be1d-80bdec7763b9-0', usage_metadata={'input_tokens': 28, 'output_tokens': 5, 'total_tokens': 33})])}


### Chain with history


In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

from core.llm import CHAT_LLM as model


store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant. Answer all questions."),
        ("placeholder", "{messages}"),
    ]
)
chain = prompt | model

with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

In [8]:
with_message_history.invoke(
    {"messages": [HumanMessage("My name is Bob")]},
    config=config,
)

AIMessage(content='Nice to meet you, Bob! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 25, 'total_tokens': 39}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-48f0b707-587a-42dc-83df-c75379da8184-0', usage_metadata={'input_tokens': 25, 'output_tokens': 14, 'total_tokens': 39})

In [9]:
with_message_history.invoke(
    {"messages": [HumanMessage("What's my name?")]},
    config=config,
)

AIMessage(content='Your name is Bob.', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 52, 'total_tokens': 57}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-7712dc12-6b3b-421a-96ce-936a1d0960a9-0', usage_metadata={'input_tokens': 52, 'output_tokens': 5, 'total_tokens': 57})

### Managing Conversation History


In [98]:
import tiktoken
from langchain_core.messages import SystemMessage, trim_messages, BaseMessage

from core.llm import CHAT_LLM


def token_counter(
    messages: list[BaseMessage], model_name: str = CHAT_LLM.model_name
) -> int:
    """Token counter for LangChain BaseMessage objects.

    Args:
        messages (List[BaseMessage]): List of LangChain BaseMessage objects.
        model_name (str): Name of the model to use for token encoding.

    Returns:
        int: Total number of tokens in the messages.

    Note:
        This function uses the tiktoken library for accurate token counting.
        It handles both string and list content types in BaseMessage objects.
        This doesn't account for non-text content like images
    """
    try:
        encoding = tiktoken.encoding_for_model(model_name)
    except KeyError:
        print(f"Warning: model {model_name} not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")

    num_tokens = 0
    for message in messages:
        # Every message follows <im_start>{role/name}\n{content}<im_end>\n
        num_tokens += 4

        # print("message:", message)
        for key, value in message.dict().items():
            # print(f" - {key}: {value}")
            if key == "content":
                if isinstance(value, str):
                    num_tokens += len(encoding.encode(value))
                elif isinstance(value, list):
                    for content_item in value:
                        if isinstance(content_item, dict) and "text" in content_item:
                            num_tokens += len(encoding.encode(content_item["text"]))
            elif key == "role":
                num_tokens += len(encoding.encode(value))
        # print(f" - num_tokens: {num_tokens+2}")

    num_tokens += 2  # Every reply is primed with <im_start>assistant
    return num_tokens


trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    # token_counter=model,
    token_counter=token_counter,  # fallback method
    include_system=True,
    start_on="human",
    end_on="ai",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant"),
 HumanMessage(content='whats 2 + 2'),
 AIMessage(content='4'),
 HumanMessage(content='thanks'),
 AIMessage(content='no problem!'),
 HumanMessage(content='having fun?'),
 AIMessage(content='yes!')]

In [99]:
rst = model.invoke(
    [
        SystemMessage(content="you're a good assistant"),
        AIMessage(content="hi!"),
        HumanMessage(content="I like vanilla ice cream"),
        AIMessage(content="nice"),
        HumanMessage(content="whats 2 + 2"),
        AIMessage(content="4"),
        HumanMessage(content="thanks"),
        AIMessage(content="no problem!"),
        HumanMessage(content="having fun?"),
        AIMessage(content="yes!"),
    ]
)
print(rst)
print(rst.content)
print(rst.usage_metadata)

content="I'm always here to help you out." response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 73, 'total_tokens': 82}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-b05b3e40-0a13-4e1c-a9ce-b8c558ca7e7e-0' usage_metadata={'input_tokens': 73, 'output_tokens': 9, 'total_tokens': 82}
I'm always here to help you out.
{'input_tokens': 73, 'output_tokens': 9, 'total_tokens': 82}


In [100]:
from operator import itemgetter

from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
    | prompt
    | model
)

with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

with_message_history.invoke(
    {"messages": [HumanMessage("My name is Bob")]},
    config=config,
)
with_message_history.invoke(
    {"messages": [HumanMessage("What's my name?")]},
    config=config,
)

AIMessage(content='How can I assist you today, Bob?', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 72, 'total_tokens': 81}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-b8071458-2f5f-41fc-8088-02e8a2b145cc-0', usage_metadata={'input_tokens': 72, 'output_tokens': 9, 'total_tokens': 81})

### Streaming

- Details: https://python.langchain.com/v0.2/docs/how_to/streaming/


In [102]:
for r in with_message_history.stream(
    {"messages": [HumanMessage("My name is Bob")]},
    config=config,
):
    print(r.content, end="|", flush=True)

|I|'m| sorry|,| I| don|'t| have| access| to| your| personal| information|.| How| can| I| assist| you| today|?||