# Setup

In [1]:
from typing import Literal, Sequence
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage, BaseMessage
from langchain_core.language_models import BaseChatModel
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic

# dummy tool
def get_weather(location: str): 
    """Get the weather for a given location"""
    return f"The weather in {location} is 70 degrees"

In [2]:
# Helper function to create a dummy message sequence for testing
def create_dummy_message_sequence(
    sequence: list[
        Literal[
            "human",
            "ai_w_tool",
            "ai_only",
            "tool",
            "system"
        ]
    ],
    dummy_content: str = "hi"  # default content for messages as anthropic requires content
) -> Sequence[BaseMessage]:
    """
    Create a sequence of messages for the conversation.
    """
    dummy_messages = []
    message_creators = {
        "human": lambda: HumanMessage(content=dummy_content),
        "ai_w_tool": lambda: AIMessage(
            content=dummy_content,
            tool_calls=[
                {
                    "name": "get_weather",
                    "args": {"location": "sf"},
                    "id": f"tool_call_id_{len(dummy_messages)}",
                    "type": "tool_call",
                }
            ],
        ),
        "ai_only": lambda: AIMessage(content=dummy_content),
        "tool": lambda: ToolMessage(
            content=dummy_content,
            name="get_weather",
            tool_call_id=f"tool_call_id_{len(dummy_messages) - 1}", # usage: Tool should always follow AI with tool.
        ),
        "system": lambda: SystemMessage(content=dummy_content),
    }

    for message_type in sequence:
        dummy_messages.append(message_creators[message_type]())

    dummy_messages = [message_creators[message_type]() for message_type in sequence]
    return dummy_messages

In [3]:
openai_model = ChatOpenAI(model="gpt-4o") 
openai_model_w_tool = openai_model.bind_tools([get_weather])
anthropic_model = ChatAnthropic(model="claude-3-5-sonnet-20240620")
anthropic_model_w_tool = anthropic_model.bind_tools([get_weather])

def pretty_invoke(model: BaseChatModel, messages: Sequence[BaseMessage]): 
  print(f"### Model: {model.get_name()} ###")
  try:
    print(f'✅: {model.invoke(messages).content}\n')
  except Exception as e:
    print(f'❌: {e}\n')

### Other ways to error out (and a general pattern where Anthropic much stricter than oai with messages)
Easy if it breaks immediately, but sometimes swapping models out works for a few runs, then breaks for any of the reasons below. Hard to debug because because the changes were made some time ago. 
- e.g. sliding windows of summarisation where most times it works, but then suddenly doesn't.
  - different permutations of whether and when a tool is called, whether it was renamed or not
- separately, when a function is run as a node in the same step as a tool call, works most of the time, but failed once

Empty messages not handled by anthropic

In [4]:
message_without_content =  create_dummy_message_sequence(["human", "ai_only", "human"], dummy_content="")
pretty_invoke(openai_model, message_without_content)
pretty_invoke(anthropic_model, message_without_content) 

### Model: ChatOpenAI ###
✅: Hello! How can I assist you today?

### Model: ChatAnthropic ###
❌: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'messages.0: all messages must have non-empty content except for the optional final assistant message'}}



ChatAnthropic needs every tool in message history to be bound, even if no longer using it. Relevant when: 
- using ChatAnthropic as a node/tool, passing history (e.g. to summarise)
- or when making changes to the tool/tool name, and re-running on same thread where message history have tool calls/tool messages that are outdated 

In [5]:
messages_with_tool_call =  create_dummy_message_sequence(["ai_w_tool", "tool", "human"])
pretty_invoke(openai_model, messages_with_tool_call)
pretty_invoke(anthropic_model, messages_with_tool_call) 
pretty_invoke(anthropic_model_w_tool, messages_with_tool_call) # model with bound tool works

### Model: ChatOpenAI ###
❌: Error code: 400 - {'error': {'message': "Invalid parameter: 'tool_call_id' of 'tool_call_id_2' not found in 'tool_calls' of previous message.", 'type': 'invalid_request_error', 'param': 'messages.[1].tool_call_id', 'code': None}}

### Model: ChatAnthropic ###
❌: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': "messages.1: unexpected `tool_use_id`s found in `tool_result` blocks: {'tool_call_id_2'}"}}

### Model: ChatAnthropic ###
❌: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': "messages.1: unexpected `tool_use_id`s found in `tool_result` blocks: {'tool_call_id_2'}"}}



Must end with human message when using tools

In [6]:
messages_ending_with_ai =  create_dummy_message_sequence(["human", "ai_only"])
pretty_invoke(openai_model, messages_ending_with_ai)
pretty_invoke(anthropic_model, messages_ending_with_ai) 
pretty_invoke(anthropic_model_w_tool, messages_ending_with_ai) 

### Model: ChatOpenAI ###
✅: Hello! How can I assist you today?

### Model: ChatAnthropic ###
✅:  there! How can I assist you today?

### Model: ChatAnthropic ###
❌: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools, pre-filling the `assistant` response is not supported.'}}



Must have system message at start

In [7]:
messages_with_system_message_inserted =  create_dummy_message_sequence(["human", "ai_only", "system", "ai_only", "human"])
pretty_invoke(openai_model, messages_with_system_message_inserted)
pretty_invoke(anthropic_model, messages_with_system_message_inserted) 

### Model: ChatOpenAI ###
✅: Hello! How can I assist you today?

### Model: ChatAnthropic ###
❌: System message must be at beginning of message list.



# Tool related errors (expected)
- ToolMessage must be after tool call and vice versa



In [8]:
tool_call_without_tool_message =  create_dummy_message_sequence(['ai_w_tool', 'human'])
pretty_invoke(openai_model, tool_call_without_tool_message)
pretty_invoke(anthropic_model, tool_call_without_tool_message) 

tool_message_without_tool_call =  create_dummy_message_sequence(['tool', 'human'])
pretty_invoke(openai_model, tool_message_without_tool_call)
pretty_invoke(anthropic_model, tool_message_without_tool_call)

### Model: ChatOpenAI ###
❌: Error code: 400 - {'error': {'message': "An assistant message with 'tool_calls' must be followed by tool messages responding to each 'tool_call_id'. The following tool_call_ids did not have response messages: tool_call_id_2", 'type': 'invalid_request_error', 'param': 'messages.[1].role', 'code': None}}

### Model: ChatAnthropic ###
❌: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'messages.1: Did not find 1 `tool_result` block(s) at the beginning of this message. Messages following `tool_use` blocks must begin with a matching number of `tool_result` blocks.'}}

### Model: ChatOpenAI ###
❌: Error code: 400 - {'error': {'message': "Invalid parameter: messages with role 'tool' must be a response to a preceeding message with 'tool_calls'.", 'type': 'invalid_request_error', 'param': 'messages.[0].role', 'code': None}}

### Model: ChatAnthropic ###
❌: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_reques

Temp quick win in docs:
- add in the error codes so show up in search
- highlight that it's important to keep tool message/tool call paired
- maybe highlighting differences for main few models?

Maybe/prob not:
- Default grouping of tool call/tool message under the hood as sensible default? e.g. when removing/trimming/summarising
- Changing error message to enrich model error code ('update with toolmessage, or remove last message in state snapshot')
- Interface at Chat Model level? e.g. o1 has no system message allowed/more and more changes in future, leave it to models to handle
- Checks on... graph compilation/runtime?
- Auto filling of ToolMessage/error handling in prod?