From 5afd6320c3536a0f99f8f380a4d13de2bc44dee4 Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Sat, 25 Apr 2026 07:06:55 +0000 Subject: [PATCH 1/2] fix(providers/openai): fix Responses replay continuity > Worked on by Mux on Mike's behalf. --- providers/openai/openai_test.go | 94 +++++-- providers/openai/responses_language_model.go | 71 ++++- providers/openai/responses_params_test.go | 257 +++++++++++++++++++ 3 files changed, 388 insertions(+), 34 deletions(-) diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go index 07bcdc981..7fc0ee19a 100644 --- a/providers/openai/openai_test.go +++ b/providers/openai/openai_test.go @@ -3183,7 +3183,9 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) + + require.NoError(t, err) require.Len(t, input, 1, "should only have user message") require.Len(t, warnings, 1) @@ -3209,7 +3211,9 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) + + require.NoError(t, err) require.Len(t, input, 2, "should have both user and assistant messages") require.Empty(t, warnings) @@ -3235,11 +3239,22 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, }, }, + { + Role: fantasy.MessageRoleTool, + Content: []fantasy.MessagePart{ + fantasy.ToolResultPart{ + ToolCallID: "call_123", + Output: fantasy.ToolResultOutputContentText{Text: "sunny"}, + }, + }, + }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) - require.Len(t, input, 2, "should have both user and assistant messages") + require.NoError(t, err) + + require.Len(t, input, 3, "should have user, assistant tool call, and tool result") require.Empty(t, warnings) }) @@ -3258,7 +3273,9 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) + + require.NoError(t, err) require.Empty(t, input) require.Len(t, warnings, 2) // One for unsupported type, one for empty message @@ -3280,16 +3297,28 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) + + require.NoError(t, err) require.Len(t, input, 1) require.Empty(t, warnings) }) - t.Run("should keep user messages with tool results", func(t *testing.T) { + t.Run("should keep tool messages with matching tool results", func(t *testing.T) { t.Parallel() prompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ToolCallPart{ + ToolCallID: "call_123", + ToolName: "get_weather", + Input: "{\"location\":\"NYC\"}", + }, + }, + }, { Role: fantasy.MessageRoleTool, Content: []fantasy.MessagePart{ @@ -3301,16 +3330,28 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) - require.Len(t, input, 1) + require.NoError(t, err) + + require.Len(t, input, 2) require.Empty(t, warnings) }) - t.Run("should keep user messages with tool error results", func(t *testing.T) { + t.Run("should keep tool messages with matching tool error results", func(t *testing.T) { t.Parallel() prompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ToolCallPart{ + ToolCallID: "call_456", + ToolName: "get_weather", + Input: "{\"location\":\"NYC\"}", + }, + }, + }, { Role: fantasy.MessageRoleTool, Content: []fantasy.MessagePart{ @@ -3322,11 +3363,14 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) { }, } - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) - require.Len(t, input, 1) + require.NoError(t, err) + + require.Len(t, input, 2) require.Empty(t, warnings) }) + } func TestParseContextTooLargeError(t *testing.T) { @@ -3955,7 +3999,9 @@ func TestResponsesToPrompt_WebSearchProviderExecutedToolResults(t *testing.T) { t.Run("store false skips item reference", func(t *testing.T) { t.Parallel() - input, warnings := toResponsesPrompt(prompt, "system instructions", false) + input, warnings, err := toResponsesPrompt(prompt, "system instructions", false) + + require.NoError(t, err) require.Empty(t, warnings) require.Len(t, input, 2, @@ -3964,16 +4010,18 @@ func TestResponsesToPrompt_WebSearchProviderExecutedToolResults(t *testing.T) { require.Nil(t, input[1].OfItemReference) }) - t.Run("store true uses item reference", func(t *testing.T) { + t.Run("store true skips item reference", func(t *testing.T) { t.Parallel() - input, warnings := toResponsesPrompt(prompt, "system instructions", true) + input, warnings, err := toResponsesPrompt(prompt, "system instructions", true) + + require.NoError(t, err) require.Empty(t, warnings) - require.Len(t, input, 3, - "expected user + item_reference + assistant text when store=true") - require.NotNil(t, input[1].OfItemReference) - require.Equal(t, "ws_01", input[1].OfItemReference.ID) + require.Len(t, input, 2, + "expected user + assistant text when store=true") + require.Nil(t, input[0].OfItemReference) + require.Nil(t, input[1].OfItemReference) }) } @@ -4019,7 +4067,9 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { t.Run("store true skips reasoning", func(t *testing.T) { t.Parallel() - input, warnings := toResponsesPrompt(prompt, "system", true) + input, warnings, err := toResponsesPrompt(prompt, "system", true) + + require.NoError(t, err) require.Empty(t, warnings) // With store=true: user, assistant text (reasoning @@ -4036,7 +4086,9 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { t.Run("store false skips reasoning", func(t *testing.T) { t.Parallel() - input, warnings := toResponsesPrompt(prompt, "system", false) + input, warnings, err := toResponsesPrompt(prompt, "system", false) + + require.NoError(t, err) require.Empty(t, warnings) // With store=false: user, assistant text, follow-up user. diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go index eb027109e..897b57216 100644 --- a/providers/openai/responses_language_model.go +++ b/providers/openai/responses_language_model.go @@ -175,8 +175,11 @@ func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.Res } storeEnabled := openaiOptions != nil && openaiOptions.Store != nil && *openaiOptions.Store - input, inputWarnings := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode, storeEnabled) + input, inputWarnings, err := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode, storeEnabled) warnings = append(warnings, inputWarnings...) + if err != nil { + return nil, warnings, err + } var include []IncludeType @@ -390,7 +393,7 @@ func responsesUsage(resp responses.Response) fantasy.Usage { return usage } -func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bool) (responses.ResponseInputParam, []fantasy.CallWarning) { +func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bool) (responses.ResponseInputParam, []fantasy.CallWarning, error) { var input responses.ResponseInputParam var warnings []fantasy.CallWarning @@ -537,16 +540,9 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo } if toolCallPart.ProviderExecuted { - if store { - // Round-trip provider-executed tools via - // item_reference, letting the API resolve - // the stored output item by ID. - input = append(input, responses.ResponseInputItemParamOfItemReference(toolCallPart.ToolCallID)) - } - // When store is disabled, server-side items are - // ephemeral and cannot be referenced. Skip the - // tool call; results are already omitted for - // provider-executed tools. + // Manual replay cannot safely reference stored + // provider-executed items without previous_response_id. + // Skip them, matching provider-executed tool results. continue } @@ -640,7 +636,56 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo } } - return input, warnings + if err := validateResponsesFunctionCallOutputs(input); err != nil { + return nil, warnings, err + } + + return input, warnings, nil +} + +func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam) error { + functionCalls := make(map[string]int) + functionCallOutputs := make(map[string]int) + var functionCallIDs []string + var functionCallOutputIDs []string + + for _, item := range input { + if item.OfFunctionCall != nil { + callID := item.OfFunctionCall.CallID + if functionCalls[callID] == 0 { + functionCallIDs = append(functionCallIDs, callID) + } + functionCalls[callID]++ + } + + if item.OfFunctionCallOutput != nil { + callID := item.OfFunctionCallOutput.CallID + if functionCallOutputs[callID] == 0 { + functionCallOutputIDs = append(functionCallOutputIDs, callID) + } + functionCallOutputs[callID]++ + } + } + + for _, callID := range functionCallIDs { + if functionCalls[callID] > 1 { + return fmt.Errorf("openai responses prompt has duplicate function_call for call_id %q", callID) + } + if functionCallOutputs[callID] == 0 { + return fmt.Errorf("openai responses prompt has function_call without function_call_output for call_id %q", callID) + } + } + + for _, callID := range functionCallOutputIDs { + if functionCallOutputs[callID] > 1 { + return fmt.Errorf("openai responses prompt has duplicate function_call_output for call_id %q", callID) + } + if functionCalls[callID] == 0 { + return fmt.Errorf("openai responses prompt has function_call_output without function_call for call_id %q", callID) + } + } + + return nil } func hasVisibleResponsesUserContent(content responses.ResponseInputMessageContentListParam) bool { diff --git a/providers/openai/responses_params_test.go b/providers/openai/responses_params_test.go index 2687f1db5..8b92ab10b 100644 --- a/providers/openai/responses_params_test.go +++ b/providers/openai/responses_params_test.go @@ -277,6 +277,263 @@ func TestResponsesProviderMetadata_JSON(t *testing.T) { require.Equal(t, "resp_123", providerMetadata.ResponseID) } +func TestPrepareParams_SkipsProviderExecutedToolReferences(t *testing.T) { + t.Parallel() + + lm := testResponsesLM() + prompt := fantasy.Prompt{ + testTextMessage(fantasy.MessageRoleUser, "Search for the latest AI news"), + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ToolCallPart{ + ToolCallID: "ws_01", + ToolName: "web_search", + ProviderExecuted: true, + }, + fantasy.TextPart{Text: "Here is what I found."}, + }, + }, + } + + tests := []struct { + name string + opts *ResponsesProviderOptions + }{ + { + name: "store true", + opts: &ResponsesProviderOptions{Store: fantasy.Opt(true)}, + }, + { + name: "store false", + opts: &ResponsesProviderOptions{Store: fantasy.Opt(false)}, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + params, warnings, err := lm.prepareParams(testCall(prompt, tt.opts)) + require.NoError(t, err) + require.Empty(t, warnings) + + input := params.Input.OfInputItemList + require.Len(t, input, 2) + require.NotNil(t, input[1].OfMessage) + for _, item := range input { + require.Nil(t, item.OfItemReference) + require.Nil(t, item.OfWebSearchCall) + } + + encoded, err := json.Marshal(params) + require.NoError(t, err) + require.Contains(t, string(encoded), "Here is what I found.") + require.NotContains(t, string(encoded), "ws_01") + require.NotContains(t, string(encoded), "item_reference") + require.NotContains(t, string(encoded), "web_search_call") + + items := responseInputItemsFromJSON(t, encoded) + require.Len(t, items, 2) + for _, item := range items { + require.NotEqual(t, "item_reference", item["type"]) + require.NotEqual(t, "web_search_call", item["type"]) + require.NotEqual(t, "ws_01", item["id"]) + } + }) + } +} + +func TestPrepareParams_ValidatesFunctionCallOutputPairing(t *testing.T) { + t.Parallel() + + lm := testResponsesLM() + + t.Run("matching local call and output", func(t *testing.T) { + t.Parallel() + + params, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testTextMessage(fantasy.MessageRoleUser, "weather"), + testResponsesToolCallMessage("call_local"), + testResponsesToolResultMessage("call_local", "sunny"), + }, nil)) + require.NoError(t, err) + require.Empty(t, warnings) + + var functionCalls int + var functionCallOutputs int + for _, item := range params.Input.OfInputItemList { + if item.OfFunctionCall != nil { + functionCalls++ + require.Equal(t, "call_local", item.OfFunctionCall.CallID) + } + if item.OfFunctionCallOutput != nil { + functionCallOutputs++ + require.Equal(t, "call_local", item.OfFunctionCallOutput.CallID) + } + } + require.Equal(t, 1, functionCalls) + require.Equal(t, 1, functionCallOutputs) + + encoded, err := json.Marshal(params) + require.NoError(t, err) + items := responseInputItemsFromJSON(t, encoded) + var jsonFunctionCalls int + var jsonFunctionCallOutputs int + for _, item := range items { + switch item["type"] { + case "function_call": + jsonFunctionCalls++ + require.Equal(t, "call_local", item["call_id"]) + case "function_call_output": + jsonFunctionCallOutputs++ + require.Equal(t, "call_local", item["call_id"]) + } + } + require.Equal(t, 1, jsonFunctionCalls) + require.Equal(t, 1, jsonFunctionCallOutputs) + }) + + t.Run("missing local output", func(t *testing.T) { + t.Parallel() + + _, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testTextMessage(fantasy.MessageRoleUser, "weather"), + testResponsesToolCallMessage("call_missing"), + }, nil)) + require.EqualError(t, err, `openai responses prompt has function_call without function_call_output for call_id "call_missing"`) + require.Empty(t, warnings) + }) + + t.Run("duplicate local outputs", func(t *testing.T) { + t.Parallel() + + _, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testResponsesToolCallMessage("call_duplicate"), + { + Role: fantasy.MessageRoleTool, + Content: []fantasy.MessagePart{ + fantasy.ToolResultPart{ + ToolCallID: "call_duplicate", + Output: fantasy.ToolResultOutputContentText{Text: "first"}, + }, + fantasy.ToolResultPart{ + ToolCallID: "call_duplicate", + Output: fantasy.ToolResultOutputContentText{Text: "second"}, + }, + }, + }, + }, nil)) + require.EqualError(t, err, `openai responses prompt has duplicate function_call_output for call_id "call_duplicate"`) + require.Empty(t, warnings) + }) + + t.Run("output without local call", func(t *testing.T) { + t.Parallel() + + _, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testResponsesToolResultMessage("call_orphan", "done"), + }, nil)) + require.EqualError(t, err, `openai responses prompt has function_call_output without function_call for call_id "call_orphan"`) + require.Empty(t, warnings) + }) + + t.Run("duplicate local calls", func(t *testing.T) { + t.Parallel() + + _, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testResponsesToolCallMessage("call_duplicate"), + testResponsesToolCallMessage("call_duplicate"), + testResponsesToolResultMessage("call_duplicate", "done"), + }, nil)) + require.EqualError(t, err, `openai responses prompt has duplicate function_call for call_id "call_duplicate"`) + require.Empty(t, warnings) + }) + + t.Run("provider executed output is skipped", func(t *testing.T) { + t.Parallel() + + input, warnings, err := toResponsesPrompt(fantasy.Prompt{ + testResponsesProviderToolResultMessage("ws_01"), + }, "system", false) + require.NoError(t, err) + require.Empty(t, warnings) + require.Empty(t, input) + }) + + t.Run("provider executed output does not satisfy local call", func(t *testing.T) { + t.Parallel() + + _, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testResponsesToolCallMessage("call_provider_result"), + testResponsesProviderToolResultMessage("call_provider_result"), + }, nil)) + require.EqualError(t, err, `openai responses prompt has function_call without function_call_output for call_id "call_provider_result"`) + require.Empty(t, warnings) + }) +} + +func responseInputItemsFromJSON(t *testing.T, encoded []byte) []map[string]any { + t.Helper() + + var body map[string]any + require.NoError(t, json.Unmarshal(encoded, &body)) + + rawInput, ok := body["input"].([]any) + require.True(t, ok) + + items := make([]map[string]any, 0, len(rawInput)) + for _, rawItem := range rawInput { + item, ok := rawItem.(map[string]any) + require.True(t, ok) + items = append(items, item) + } + return items +} + +func testResponsesToolCallMessage(callID string) fantasy.Message { + return fantasy.Message{ + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ToolCallPart{ + ToolCallID: callID, + ToolName: "get_weather", + Input: "{\"location\":\"NYC\"}", + }, + }, + } +} + +func testResponsesToolResultMessage(callID string, text string) fantasy.Message { + return fantasy.Message{ + Role: fantasy.MessageRoleTool, + Content: []fantasy.MessagePart{ + fantasy.ToolResultPart{ + ToolCallID: callID, + Output: fantasy.ToolResultOutputContentText{ + Text: text, + }, + }, + }, + } +} + +func testResponsesProviderToolResultMessage(callID string) fantasy.Message { + return fantasy.Message{ + Role: fantasy.MessageRoleTool, + Content: []fantasy.MessagePart{ + fantasy.ToolResultPart{ + ToolCallID: callID, + ProviderExecuted: true, + Output: fantasy.ToolResultOutputContentText{ + Text: "provider result", + }, + }, + }, + } +} + func testCall(prompt fantasy.Prompt, opts *ResponsesProviderOptions) fantasy.Call { call := fantasy.Call{ Prompt: prompt, From f83367a4a2055b3cbc81d534282b8c907f612dfc Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Sun, 26 Apr 2026 02:08:13 +0000 Subject: [PATCH 2/2] fix(providers/openai): preserve Responses item continuity --- providers/openai/openai_test.go | 244 ++++++++++++++++++- providers/openai/responses_language_model.go | 139 ++++++++--- providers/openai/responses_params_test.go | 46 ++++ 3 files changed, 392 insertions(+), 37 deletions(-) diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go index 7fc0ee19a..052d4f6ad 100644 --- a/providers/openai/openai_test.go +++ b/providers/openai/openai_test.go @@ -4064,7 +4064,7 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { }, } - t.Run("store true skips reasoning", func(t *testing.T) { + t.Run("store true emits item_reference for reasoning", func(t *testing.T) { t.Parallel() input, warnings, err := toResponsesPrompt(prompt, "system", true) @@ -4072,14 +4072,63 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { require.NoError(t, err) require.Empty(t, warnings) - // With store=true: user, assistant text (reasoning - // skipped), follow-up user. - require.Len(t, input, 3) + // With store=true the reasoning item is replayed as an + // item_reference so any following provider-executed item + // pairs correctly. Order: user, item_reference(rs_*), + // assistant text, user. + require.Len(t, input, 4) - // Verify no reasoning item leaked through. for _, item := range input { require.Nil(t, item.OfReasoning, - "reasoning items must not appear when store=true") + "reasoning items must not appear inline when store=true") + } + + require.NotNil(t, input[1].OfItemReference, + "expected reasoning replayed via item_reference") + require.Equal(t, reasoningItemID, input[1].OfItemReference.ID) + }) + + t.Run("store true skips reasoning when item id missing", func(t *testing.T) { + t.Parallel() + + noIDPrompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "What is 2+2?"}, + }, + }, + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "thinking", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{Summary: []string{}}, + }, + }, + fantasy.TextPart{Text: "4"}, + }, + }, + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "And 3+3?"}, + }, + }, + } + + input, warnings, err := toResponsesPrompt(noIDPrompt, "system", true) + + require.NoError(t, err) + require.Empty(t, warnings) + + // Without an ItemID we cannot reference the reasoning + // item. Order: user, assistant text, user. + require.Len(t, input, 3) + for _, item := range input { + require.Nil(t, item.OfReasoning) + require.Nil(t, item.OfItemReference) } }) @@ -4097,10 +4146,193 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { for _, item := range input { require.Nil(t, item.OfReasoning, "reasoning items must not appear when store=false") + require.Nil(t, item.OfItemReference, + "reasoning item_reference must not appear when store=false") } }) } +func TestResponsesToPrompt_ReasoningWithWebSearchCombined(t *testing.T) { + t.Parallel() + + reasoningItemID := "rs_002" + webSearchItemID := "ws_002" + + prompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "What is the weather in San Francisco?"}, + }, + }, + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "I should look this up.", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{ + ItemID: reasoningItemID, + Summary: []string{}, + }, + }, + }, + fantasy.ToolCallPart{ + ToolCallID: webSearchItemID, + ToolName: "web_search", + ProviderExecuted: true, + }, + fantasy.ToolResultPart{ + ToolCallID: webSearchItemID, + ProviderExecuted: true, + }, + fantasy.TextPart{Text: "Sunny."}, + }, + }, + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "And Tokyo?"}, + }, + }, + } + + t.Run("store true pairs reasoning and web search", func(t *testing.T) { + t.Parallel() + + input, warnings, err := toResponsesPrompt(prompt, "system", true) + + require.NoError(t, err) + require.Empty(t, warnings) + + // Order: user, item_reference(rs_*), item_reference(ws_*), + // assistant text, user. + require.Len(t, input, 5) + + require.NotNil(t, input[1].OfItemReference) + require.Equal(t, reasoningItemID, input[1].OfItemReference.ID, + "reasoning item_reference must precede web_search item_reference") + + require.NotNil(t, input[2].OfItemReference) + require.Equal(t, webSearchItemID, input[2].OfItemReference.ID) + }) + + t.Run("store false skips both reasoning and provider tool call", func(t *testing.T) { + t.Parallel() + + input, warnings, err := toResponsesPrompt(prompt, "system", false) + + require.NoError(t, err) + require.Empty(t, warnings) + + // Both reasoning and the provider-executed web_search_call + // are skipped under store=false. user, assistant text, user. + require.Len(t, input, 3) + for _, item := range input { + require.Nil(t, item.OfItemReference) + require.Nil(t, item.OfReasoning) + } + }) +} + +func TestResponsesToPrompt_WebSearchRequiresReasoningReference(t *testing.T) { + t.Parallel() + + input, warnings, err := toResponsesPrompt(fantasy.Prompt{ + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "I should search.", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{Summary: []string{}}, + }, + }, + fantasy.ToolCallPart{ + ToolCallID: "ws_missing_reasoning", + ToolName: "web_search", + ProviderExecuted: true, + }, + fantasy.TextPart{Text: "Search completed."}, + }, + }, + }, "system", true) + + require.NoError(t, err) + require.Empty(t, warnings) + require.Len(t, input, 1) + require.NotNil(t, input[0].OfMessage) +} + +func TestResponsesToPrompt_ReasoningWithFunctionCallCombined(t *testing.T) { + t.Parallel() + + reasoningItemID := "rs_003" + functionCallID := "call_003" + + prompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "compute 1+1"}, + }, + }, + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "I'll call add.", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{ + ItemID: reasoningItemID, + Summary: []string{}, + }, + }, + }, + fantasy.ToolCallPart{ + ToolCallID: functionCallID, + ToolName: "add", + Input: `{"a":1,"b":1}`, + }, + }, + }, + { + Role: fantasy.MessageRoleTool, + Content: []fantasy.MessagePart{ + fantasy.ToolResultPart{ + ToolCallID: functionCallID, + Output: fantasy.ToolResultOutputContentText{Text: "2"}, + }, + }, + }, + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "thanks"}, + }, + }, + } + + input, warnings, err := toResponsesPrompt(prompt, "system", true) + + require.NoError(t, err) + require.Empty(t, warnings) + + // Order: user, item_reference(rs_003), function_call(call_003), + // function_call_output(call_003), user. + require.Len(t, input, 5) + + require.NotNil(t, input[1].OfItemReference) + require.Equal(t, reasoningItemID, input[1].OfItemReference.ID, + "reasoning item_reference must precede function_call") + + require.NotNil(t, input[2].OfFunctionCall) + require.Equal(t, functionCallID, input[2].OfFunctionCall.CallID) + + require.NotNil(t, input[3].OfFunctionCallOutput) + require.Equal(t, functionCallID, input[3].OfFunctionCallOutput.CallID) +} + func TestResponsesStream_WebSearchResponse(t *testing.T) { t.Parallel() diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go index 897b57216..03786710e 100644 --- a/providers/openai/responses_language_model.go +++ b/providers/openai/responses_language_model.go @@ -516,6 +516,7 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo case fantasy.MessageRoleAssistant: startIdx := len(input) + lastEmittedReasoningReference := false for _, c := range msg.Content { switch c.GetType() { case fantasy.ContentTypeText: @@ -528,6 +529,7 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo continue } input = append(input, responses.ResponseInputItemParamOfMessage(textPart.Text, responses.EasyInputMessageRoleAssistant)) + lastEmittedReasoningReference = false case fantasy.ContentTypeToolCall: toolCallPart, ok := fantasy.AsContentType[fantasy.ToolCallPart](c) @@ -540,9 +542,12 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo } if toolCallPart.ProviderExecuted { - // Manual replay cannot safely reference stored - // provider-executed items without previous_response_id. - // Skip them, matching provider-executed tool results. + if store && lastEmittedReasoningReference && + isResponsesWebSearchToolCall(toolCallPart) && + toolCallPart.ToolCallID != "" { + input = append(input, responses.ResponseInputItemParamOfItemReference(toolCallPart.ToolCallID)) + } + lastEmittedReasoningReference = false continue } @@ -556,21 +561,35 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo } input = append(input, responses.ResponseInputItemParamOfFunctionCall(string(inputJSON), toolCallPart.ToolCallID, toolCallPart.ToolName)) + lastEmittedReasoningReference = false case fantasy.ContentTypeSource: // Source citations from web search are not a // recognised Responses API input type; skip. continue case fantasy.ContentTypeReasoning: - // Reasoning items are always skipped during replay. - // When store is enabled, the API already has them - // persisted server-side. When store is disabled, the - // item IDs are ephemeral and referencing them causes - // "Item not found" errors. In both cases, replaying - // reasoning inline is not supported by the API. + lastEmittedReasoningReference = false + if !store { + // When store is disabled, server-side reasoning + // items are ephemeral and cannot be referenced. + continue + } + reasoningPart, ok := fantasy.AsContentType[fantasy.ReasoningPart](c) + if !ok { + warnings = append(warnings, fantasy.CallWarning{ + Type: fantasy.CallWarningTypeOther, + Message: "assistant reasoning part does not have the right type", + }) + continue + } + meta := GetReasoningMetadata(reasoningPart.ProviderOptions) + if meta == nil || meta.ItemID == "" { + continue + } + input = append(input, responses.ResponseInputItemParamOfItemReference(meta.ItemID)) + lastEmittedReasoningReference = true continue } } - if !hasVisibleResponsesAssistantContent(input, startIdx) { warnings = append(warnings, fantasy.CallWarning{ Type: fantasy.CallWarningTypeOther, @@ -636,58 +655,116 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo } } - if err := validateResponsesFunctionCallOutputs(input); err != nil { + if err := validateResponsesInput(input); err != nil { return nil, warnings, err } return input, warnings, nil } +func isResponsesWebSearchToolCall(toolCallPart fantasy.ToolCallPart) bool { + return toolCallPart.ToolName == "web_search" || + toolCallPart.ToolName == "web_search_preview" +} + +func validateResponsesInput(input responses.ResponseInputParam) error { + if err := validateResponsesFunctionCallOutputs(input); err != nil { + return err + } + return validateResponsesItemReferences(input) +} + func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam) error { - functionCalls := make(map[string]int) - functionCallOutputs := make(map[string]int) - var functionCallIDs []string - var functionCallOutputIDs []string + type callState struct { + calls int + outputs int + firstCall int + firstOutput int + } + states := make(map[string]*callState) + var callIDs []string + var outputIDs []string + + stateFor := func(callID string) *callState { + state, ok := states[callID] + if ok { + return state + } + state = &callState{firstCall: -1, firstOutput: -1} + states[callID] = state + return state + } - for _, item := range input { + for index, item := range input { if item.OfFunctionCall != nil { callID := item.OfFunctionCall.CallID - if functionCalls[callID] == 0 { - functionCallIDs = append(functionCallIDs, callID) + state := stateFor(callID) + if state.calls == 0 { + callIDs = append(callIDs, callID) + state.firstCall = index } - functionCalls[callID]++ + state.calls++ } if item.OfFunctionCallOutput != nil { callID := item.OfFunctionCallOutput.CallID - if functionCallOutputs[callID] == 0 { - functionCallOutputIDs = append(functionCallOutputIDs, callID) + state := stateFor(callID) + if state.outputs == 0 { + outputIDs = append(outputIDs, callID) + state.firstOutput = index } - functionCallOutputs[callID]++ + state.outputs++ } } - for _, callID := range functionCallIDs { - if functionCalls[callID] > 1 { + for _, callID := range callIDs { + state := states[callID] + if state.calls > 1 { return fmt.Errorf("openai responses prompt has duplicate function_call for call_id %q", callID) } - if functionCallOutputs[callID] == 0 { - return fmt.Errorf("openai responses prompt has function_call without function_call_output for call_id %q", callID) - } } - - for _, callID := range functionCallOutputIDs { - if functionCallOutputs[callID] > 1 { + for _, callID := range outputIDs { + state := states[callID] + if state.outputs > 1 { return fmt.Errorf("openai responses prompt has duplicate function_call_output for call_id %q", callID) } - if functionCalls[callID] == 0 { + } + for _, callID := range outputIDs { + state := states[callID] + if state.calls == 0 { return fmt.Errorf("openai responses prompt has function_call_output without function_call for call_id %q", callID) } + if state.firstOutput < state.firstCall { + return fmt.Errorf("openai responses prompt has function_call_output before function_call for call_id %q", callID) + } + } + for _, callID := range callIDs { + state := states[callID] + if state.outputs == 0 { + return fmt.Errorf("openai responses prompt has function_call without function_call_output for call_id %q", callID) + } } return nil } +func validateResponsesItemReferences(input responses.ResponseInputParam) error { + previousReferenceID := "" + for _, item := range input { + if item.OfItemReference == nil { + previousReferenceID = "" + continue + } + + itemID := item.OfItemReference.ID + if strings.HasPrefix(itemID, "ws_") && !strings.HasPrefix(previousReferenceID, "rs_") { + return fmt.Errorf("openai responses prompt has web_search_call item_reference without preceding reasoning item_reference for item_id %q", itemID) + } + previousReferenceID = itemID + } + return nil +} + func hasVisibleResponsesUserContent(content responses.ResponseInputMessageContentListParam) bool { return len(content) > 0 } diff --git a/providers/openai/responses_params_test.go b/providers/openai/responses_params_test.go index 8b92ab10b..ae0896f32 100644 --- a/providers/openai/responses_params_test.go +++ b/providers/openai/responses_params_test.go @@ -5,6 +5,7 @@ import ( "testing" "charm.land/fantasy" + "github.com/charmbracelet/openai-go/responses" "github.com/stretchr/testify/require" ) @@ -439,6 +440,17 @@ func TestPrepareParams_ValidatesFunctionCallOutputPairing(t *testing.T) { require.Empty(t, warnings) }) + t.Run("output before local call", func(t *testing.T) { + t.Parallel() + + _, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{ + testResponsesToolResultMessage("call_late", "done"), + testResponsesToolCallMessage("call_late"), + }, nil)) + require.EqualError(t, err, `openai responses prompt has function_call_output before function_call for call_id "call_late"`) + require.Empty(t, warnings) + }) + t.Run("duplicate local calls", func(t *testing.T) { t.Parallel() @@ -474,6 +486,40 @@ func TestPrepareParams_ValidatesFunctionCallOutputPairing(t *testing.T) { }) } +func TestValidateResponsesInput_WebSearchReferenceRequiresReasoning(t *testing.T) { + t.Parallel() + + t.Run("valid reasoning and web search references", func(t *testing.T) { + t.Parallel() + + err := validateResponsesInput(responses.ResponseInputParam{ + responses.ResponseInputItemParamOfItemReference("rs_valid"), + responses.ResponseInputItemParamOfItemReference("ws_valid"), + }) + require.NoError(t, err) + }) + + t.Run("web search reference without reasoning", func(t *testing.T) { + t.Parallel() + + err := validateResponsesInput(responses.ResponseInputParam{ + responses.ResponseInputItemParamOfItemReference("ws_orphan"), + }) + require.EqualError(t, err, `openai responses prompt has web_search_call item_reference without preceding reasoning item_reference for item_id "ws_orphan"`) + }) + + t.Run("web search reference after non-reference item", func(t *testing.T) { + t.Parallel() + + err := validateResponsesInput(responses.ResponseInputParam{ + responses.ResponseInputItemParamOfItemReference("rs_valid"), + responses.ResponseInputItemParamOfMessage("text", responses.EasyInputMessageRoleAssistant), + responses.ResponseInputItemParamOfItemReference("ws_orphan"), + }) + require.EqualError(t, err, `openai responses prompt has web_search_call item_reference without preceding reasoning item_reference for item_id "ws_orphan"`) + }) +} + func responseInputItemsFromJSON(t *testing.T, encoded []byte) []map[string]any { t.Helper()