From 5afd6320c3536a0f99f8f380a4d13de2bc44dee4 Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Sat, 25 Apr 2026 07:06:55 +0000
Subject: [PATCH 1/2] fix(providers/openai): fix Responses replay continuity

> Worked on by Mux on Mike's behalf.
---
 providers/openai/openai_test.go              |  94 +++++--
 providers/openai/responses_language_model.go |  71 ++++-
 providers/openai/responses_params_test.go    | 257 +++++++++++++++++++
 3 files changed, 388 insertions(+), 34 deletions(-)

diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go
index 07bcdc981..7fc0ee19a 100644
--- a/providers/openai/openai_test.go
+++ b/providers/openai/openai_test.go
@@ -3183,7 +3183,9 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
+
+		require.NoError(t, err)
 
 		require.Len(t, input, 1, "should only have user message")
 		require.Len(t, warnings, 1)
@@ -3209,7 +3211,9 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
+
+		require.NoError(t, err)
 
 		require.Len(t, input, 2, "should have both user and assistant messages")
 		require.Empty(t, warnings)
@@ -3235,11 +3239,22 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 					},
 				},
 			},
+			{
+				Role: fantasy.MessageRoleTool,
+				Content: []fantasy.MessagePart{
+					fantasy.ToolResultPart{
+						ToolCallID: "call_123",
+						Output:     fantasy.ToolResultOutputContentText{Text: "sunny"},
+					},
+				},
+			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
 
-		require.Len(t, input, 2, "should have both user and assistant messages")
+		require.NoError(t, err)
+
+		require.Len(t, input, 3, "should have user, assistant tool call, and tool result")
 		require.Empty(t, warnings)
 	})
 
@@ -3258,7 +3273,9 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
+
+		require.NoError(t, err)
 
 		require.Empty(t, input)
 		require.Len(t, warnings, 2) // One for unsupported type, one for empty message
@@ -3280,16 +3297,28 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
+
+		require.NoError(t, err)
 
 		require.Len(t, input, 1)
 		require.Empty(t, warnings)
 	})
 
-	t.Run("should keep user messages with tool results", func(t *testing.T) {
+	t.Run("should keep tool messages with matching tool results", func(t *testing.T) {
 		t.Parallel()
 
 		prompt := fantasy.Prompt{
+			{
+				Role: fantasy.MessageRoleAssistant,
+				Content: []fantasy.MessagePart{
+					fantasy.ToolCallPart{
+						ToolCallID: "call_123",
+						ToolName:   "get_weather",
+						Input:      "{\"location\":\"NYC\"}",
+					},
+				},
+			},
 			{
 				Role: fantasy.MessageRoleTool,
 				Content: []fantasy.MessagePart{
@@ -3301,16 +3330,28 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
 
-		require.Len(t, input, 1)
+		require.NoError(t, err)
+
+		require.Len(t, input, 2)
 		require.Empty(t, warnings)
 	})
 
-	t.Run("should keep user messages with tool error results", func(t *testing.T) {
+	t.Run("should keep tool messages with matching tool error results", func(t *testing.T) {
 		t.Parallel()
 
 		prompt := fantasy.Prompt{
+			{
+				Role: fantasy.MessageRoleAssistant,
+				Content: []fantasy.MessagePart{
+					fantasy.ToolCallPart{
+						ToolCallID: "call_456",
+						ToolName:   "get_weather",
+						Input:      "{\"location\":\"NYC\"}",
+					},
+				},
+			},
 			{
 				Role: fantasy.MessageRoleTool,
 				Content: []fantasy.MessagePart{
@@ -3322,11 +3363,14 @@ func TestResponsesToPrompt_DropsEmptyMessages(t *testing.T) {
 			},
 		}
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
 
-		require.Len(t, input, 1)
+		require.NoError(t, err)
+
+		require.Len(t, input, 2)
 		require.Empty(t, warnings)
 	})
+
 }
 
 func TestParseContextTooLargeError(t *testing.T) {
@@ -3955,7 +3999,9 @@ func TestResponsesToPrompt_WebSearchProviderExecutedToolResults(t *testing.T) {
 	t.Run("store false skips item reference", func(t *testing.T) {
 		t.Parallel()
 
-		input, warnings := toResponsesPrompt(prompt, "system instructions", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system instructions", false)
+
+		require.NoError(t, err)
 
 		require.Empty(t, warnings)
 		require.Len(t, input, 2,
@@ -3964,16 +4010,18 @@ func TestResponsesToPrompt_WebSearchProviderExecutedToolResults(t *testing.T) {
 		require.Nil(t, input[1].OfItemReference)
 	})
 
-	t.Run("store true uses item reference", func(t *testing.T) {
+	t.Run("store true skips item reference", func(t *testing.T) {
 		t.Parallel()
 
-		input, warnings := toResponsesPrompt(prompt, "system instructions", true)
+		input, warnings, err := toResponsesPrompt(prompt, "system instructions", true)
+
+		require.NoError(t, err)
 
 		require.Empty(t, warnings)
-		require.Len(t, input, 3,
-			"expected user + item_reference + assistant text when store=true")
-		require.NotNil(t, input[1].OfItemReference)
-		require.Equal(t, "ws_01", input[1].OfItemReference.ID)
+		require.Len(t, input, 2,
+			"expected user + assistant text when store=true")
+		require.Nil(t, input[0].OfItemReference)
+		require.Nil(t, input[1].OfItemReference)
 	})
 }
 
@@ -4019,7 +4067,9 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 	t.Run("store true skips reasoning", func(t *testing.T) {
 		t.Parallel()
 
-		input, warnings := toResponsesPrompt(prompt, "system", true)
+		input, warnings, err := toResponsesPrompt(prompt, "system", true)
+
+		require.NoError(t, err)
 		require.Empty(t, warnings)
 
 		// With store=true: user, assistant text (reasoning
@@ -4036,7 +4086,9 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 	t.Run("store false skips reasoning", func(t *testing.T) {
 		t.Parallel()
 
-		input, warnings := toResponsesPrompt(prompt, "system", false)
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
+
+		require.NoError(t, err)
 		require.Empty(t, warnings)
 
 		// With store=false: user, assistant text, follow-up user.
diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go
index eb027109e..897b57216 100644
--- a/providers/openai/responses_language_model.go
+++ b/providers/openai/responses_language_model.go
@@ -175,8 +175,11 @@ func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.Res
 	}
 
 	storeEnabled := openaiOptions != nil && openaiOptions.Store != nil && *openaiOptions.Store
-	input, inputWarnings := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode, storeEnabled)
+	input, inputWarnings, err := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode, storeEnabled)
 	warnings = append(warnings, inputWarnings...)
+	if err != nil {
+		return nil, warnings, err
+	}
 
 	var include []IncludeType
 
@@ -390,7 +393,7 @@ func responsesUsage(resp responses.Response) fantasy.Usage {
 	return usage
 }
 
-func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bool) (responses.ResponseInputParam, []fantasy.CallWarning) {
+func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bool) (responses.ResponseInputParam, []fantasy.CallWarning, error) {
 	var input responses.ResponseInputParam
 	var warnings []fantasy.CallWarning
 
@@ -537,16 +540,9 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 					}
 
 					if toolCallPart.ProviderExecuted {
-						if store {
-							// Round-trip provider-executed tools via
-							// item_reference, letting the API resolve
-							// the stored output item by ID.
-							input = append(input, responses.ResponseInputItemParamOfItemReference(toolCallPart.ToolCallID))
-						}
-						// When store is disabled, server-side items are
-						// ephemeral and cannot be referenced. Skip the
-						// tool call; results are already omitted for
-						// provider-executed tools.
+						// Manual replay cannot safely reference stored
+						// provider-executed items without previous_response_id.
+						// Skip them, matching provider-executed tool results.
 						continue
 					}
 
@@ -640,7 +636,56 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 		}
 	}
 
-	return input, warnings
+	if err := validateResponsesFunctionCallOutputs(input); err != nil {
+		return nil, warnings, err
+	}
+
+	return input, warnings, nil
+}
+
+func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam) error {
+	functionCalls := make(map[string]int)
+	functionCallOutputs := make(map[string]int)
+	var functionCallIDs []string
+	var functionCallOutputIDs []string
+
+	for _, item := range input {
+		if item.OfFunctionCall != nil {
+			callID := item.OfFunctionCall.CallID
+			if functionCalls[callID] == 0 {
+				functionCallIDs = append(functionCallIDs, callID)
+			}
+			functionCalls[callID]++
+		}
+
+		if item.OfFunctionCallOutput != nil {
+			callID := item.OfFunctionCallOutput.CallID
+			if functionCallOutputs[callID] == 0 {
+				functionCallOutputIDs = append(functionCallOutputIDs, callID)
+			}
+			functionCallOutputs[callID]++
+		}
+	}
+
+	for _, callID := range functionCallIDs {
+		if functionCalls[callID] > 1 {
+			return fmt.Errorf("openai responses prompt has duplicate function_call for call_id %q", callID)
+		}
+		if functionCallOutputs[callID] == 0 {
+			return fmt.Errorf("openai responses prompt has function_call without function_call_output for call_id %q", callID)
+		}
+	}
+
+	for _, callID := range functionCallOutputIDs {
+		if functionCallOutputs[callID] > 1 {
+			return fmt.Errorf("openai responses prompt has duplicate function_call_output for call_id %q", callID)
+		}
+		if functionCalls[callID] == 0 {
+			return fmt.Errorf("openai responses prompt has function_call_output without function_call for call_id %q", callID)
+		}
+	}
+
+	return nil
 }
 
 func hasVisibleResponsesUserContent(content responses.ResponseInputMessageContentListParam) bool {
diff --git a/providers/openai/responses_params_test.go b/providers/openai/responses_params_test.go
index 2687f1db5..8b92ab10b 100644
--- a/providers/openai/responses_params_test.go
+++ b/providers/openai/responses_params_test.go
@@ -277,6 +277,263 @@ func TestResponsesProviderMetadata_JSON(t *testing.T) {
 	require.Equal(t, "resp_123", providerMetadata.ResponseID)
 }
 
+func TestPrepareParams_SkipsProviderExecutedToolReferences(t *testing.T) {
+	t.Parallel()
+
+	lm := testResponsesLM()
+	prompt := fantasy.Prompt{
+		testTextMessage(fantasy.MessageRoleUser, "Search for the latest AI news"),
+		{
+			Role: fantasy.MessageRoleAssistant,
+			Content: []fantasy.MessagePart{
+				fantasy.ToolCallPart{
+					ToolCallID:       "ws_01",
+					ToolName:         "web_search",
+					ProviderExecuted: true,
+				},
+				fantasy.TextPart{Text: "Here is what I found."},
+			},
+		},
+	}
+
+	tests := []struct {
+		name string
+		opts *ResponsesProviderOptions
+	}{
+		{
+			name: "store true",
+			opts: &ResponsesProviderOptions{Store: fantasy.Opt(true)},
+		},
+		{
+			name: "store false",
+			opts: &ResponsesProviderOptions{Store: fantasy.Opt(false)},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			params, warnings, err := lm.prepareParams(testCall(prompt, tt.opts))
+			require.NoError(t, err)
+			require.Empty(t, warnings)
+
+			input := params.Input.OfInputItemList
+			require.Len(t, input, 2)
+			require.NotNil(t, input[1].OfMessage)
+			for _, item := range input {
+				require.Nil(t, item.OfItemReference)
+				require.Nil(t, item.OfWebSearchCall)
+			}
+
+			encoded, err := json.Marshal(params)
+			require.NoError(t, err)
+			require.Contains(t, string(encoded), "Here is what I found.")
+			require.NotContains(t, string(encoded), "ws_01")
+			require.NotContains(t, string(encoded), "item_reference")
+			require.NotContains(t, string(encoded), "web_search_call")
+
+			items := responseInputItemsFromJSON(t, encoded)
+			require.Len(t, items, 2)
+			for _, item := range items {
+				require.NotEqual(t, "item_reference", item["type"])
+				require.NotEqual(t, "web_search_call", item["type"])
+				require.NotEqual(t, "ws_01", item["id"])
+			}
+		})
+	}
+}
+
+func TestPrepareParams_ValidatesFunctionCallOutputPairing(t *testing.T) {
+	t.Parallel()
+
+	lm := testResponsesLM()
+
+	t.Run("matching local call and output", func(t *testing.T) {
+		t.Parallel()
+
+		params, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testTextMessage(fantasy.MessageRoleUser, "weather"),
+			testResponsesToolCallMessage("call_local"),
+			testResponsesToolResultMessage("call_local", "sunny"),
+		}, nil))
+		require.NoError(t, err)
+		require.Empty(t, warnings)
+
+		var functionCalls int
+		var functionCallOutputs int
+		for _, item := range params.Input.OfInputItemList {
+			if item.OfFunctionCall != nil {
+				functionCalls++
+				require.Equal(t, "call_local", item.OfFunctionCall.CallID)
+			}
+			if item.OfFunctionCallOutput != nil {
+				functionCallOutputs++
+				require.Equal(t, "call_local", item.OfFunctionCallOutput.CallID)
+			}
+		}
+		require.Equal(t, 1, functionCalls)
+		require.Equal(t, 1, functionCallOutputs)
+
+		encoded, err := json.Marshal(params)
+		require.NoError(t, err)
+		items := responseInputItemsFromJSON(t, encoded)
+		var jsonFunctionCalls int
+		var jsonFunctionCallOutputs int
+		for _, item := range items {
+			switch item["type"] {
+			case "function_call":
+				jsonFunctionCalls++
+				require.Equal(t, "call_local", item["call_id"])
+			case "function_call_output":
+				jsonFunctionCallOutputs++
+				require.Equal(t, "call_local", item["call_id"])
+			}
+		}
+		require.Equal(t, 1, jsonFunctionCalls)
+		require.Equal(t, 1, jsonFunctionCallOutputs)
+	})
+
+	t.Run("missing local output", func(t *testing.T) {
+		t.Parallel()
+
+		_, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testTextMessage(fantasy.MessageRoleUser, "weather"),
+			testResponsesToolCallMessage("call_missing"),
+		}, nil))
+		require.EqualError(t, err, `openai responses prompt has function_call without function_call_output for call_id "call_missing"`)
+		require.Empty(t, warnings)
+	})
+
+	t.Run("duplicate local outputs", func(t *testing.T) {
+		t.Parallel()
+
+		_, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testResponsesToolCallMessage("call_duplicate"),
+			{
+				Role: fantasy.MessageRoleTool,
+				Content: []fantasy.MessagePart{
+					fantasy.ToolResultPart{
+						ToolCallID: "call_duplicate",
+						Output:     fantasy.ToolResultOutputContentText{Text: "first"},
+					},
+					fantasy.ToolResultPart{
+						ToolCallID: "call_duplicate",
+						Output:     fantasy.ToolResultOutputContentText{Text: "second"},
+					},
+				},
+			},
+		}, nil))
+		require.EqualError(t, err, `openai responses prompt has duplicate function_call_output for call_id "call_duplicate"`)
+		require.Empty(t, warnings)
+	})
+
+	t.Run("output without local call", func(t *testing.T) {
+		t.Parallel()
+
+		_, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testResponsesToolResultMessage("call_orphan", "done"),
+		}, nil))
+		require.EqualError(t, err, `openai responses prompt has function_call_output without function_call for call_id "call_orphan"`)
+		require.Empty(t, warnings)
+	})
+
+	t.Run("duplicate local calls", func(t *testing.T) {
+		t.Parallel()
+
+		_, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testResponsesToolCallMessage("call_duplicate"),
+			testResponsesToolCallMessage("call_duplicate"),
+			testResponsesToolResultMessage("call_duplicate", "done"),
+		}, nil))
+		require.EqualError(t, err, `openai responses prompt has duplicate function_call for call_id "call_duplicate"`)
+		require.Empty(t, warnings)
+	})
+
+	t.Run("provider executed output is skipped", func(t *testing.T) {
+		t.Parallel()
+
+		input, warnings, err := toResponsesPrompt(fantasy.Prompt{
+			testResponsesProviderToolResultMessage("ws_01"),
+		}, "system", false)
+		require.NoError(t, err)
+		require.Empty(t, warnings)
+		require.Empty(t, input)
+	})
+
+	t.Run("provider executed output does not satisfy local call", func(t *testing.T) {
+		t.Parallel()
+
+		_, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testResponsesToolCallMessage("call_provider_result"),
+			testResponsesProviderToolResultMessage("call_provider_result"),
+		}, nil))
+		require.EqualError(t, err, `openai responses prompt has function_call without function_call_output for call_id "call_provider_result"`)
+		require.Empty(t, warnings)
+	})
+}
+
+func responseInputItemsFromJSON(t *testing.T, encoded []byte) []map[string]any {
+	t.Helper()
+
+	var body map[string]any
+	require.NoError(t, json.Unmarshal(encoded, &body))
+
+	rawInput, ok := body["input"].([]any)
+	require.True(t, ok)
+
+	items := make([]map[string]any, 0, len(rawInput))
+	for _, rawItem := range rawInput {
+		item, ok := rawItem.(map[string]any)
+		require.True(t, ok)
+		items = append(items, item)
+	}
+	return items
+}
+
+func testResponsesToolCallMessage(callID string) fantasy.Message {
+	return fantasy.Message{
+		Role: fantasy.MessageRoleAssistant,
+		Content: []fantasy.MessagePart{
+			fantasy.ToolCallPart{
+				ToolCallID: callID,
+				ToolName:   "get_weather",
+				Input:      "{\"location\":\"NYC\"}",
+			},
+		},
+	}
+}
+
+func testResponsesToolResultMessage(callID string, text string) fantasy.Message {
+	return fantasy.Message{
+		Role: fantasy.MessageRoleTool,
+		Content: []fantasy.MessagePart{
+			fantasy.ToolResultPart{
+				ToolCallID: callID,
+				Output: fantasy.ToolResultOutputContentText{
+					Text: text,
+				},
+			},
+		},
+	}
+}
+
+func testResponsesProviderToolResultMessage(callID string) fantasy.Message {
+	return fantasy.Message{
+		Role: fantasy.MessageRoleTool,
+		Content: []fantasy.MessagePart{
+			fantasy.ToolResultPart{
+				ToolCallID:       callID,
+				ProviderExecuted: true,
+				Output: fantasy.ToolResultOutputContentText{
+					Text: "provider result",
+				},
+			},
+		},
+	}
+}
+
 func testCall(prompt fantasy.Prompt, opts *ResponsesProviderOptions) fantasy.Call {
 	call := fantasy.Call{
 		Prompt: prompt,

From f83367a4a2055b3cbc81d534282b8c907f612dfc Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Sun, 26 Apr 2026 02:08:13 +0000
Subject: [PATCH 2/2] fix(providers/openai): preserve Responses item continuity

---
 providers/openai/openai_test.go              | 244 ++++++++++++++++++-
 providers/openai/responses_language_model.go | 139 ++++++++---
 providers/openai/responses_params_test.go    |  46 ++++
 3 files changed, 392 insertions(+), 37 deletions(-)

diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go
index 7fc0ee19a..052d4f6ad 100644
--- a/providers/openai/openai_test.go
+++ b/providers/openai/openai_test.go
@@ -4064,7 +4064,7 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 		},
 	}
 
-	t.Run("store true skips reasoning", func(t *testing.T) {
+	t.Run("store true emits item_reference for reasoning", func(t *testing.T) {
 		t.Parallel()
 
 		input, warnings, err := toResponsesPrompt(prompt, "system", true)
@@ -4072,14 +4072,63 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 		require.NoError(t, err)
 		require.Empty(t, warnings)
 
-		// With store=true: user, assistant text (reasoning
-		// skipped), follow-up user.
-		require.Len(t, input, 3)
+		// With store=true the reasoning item is replayed as an
+		// item_reference so any following provider-executed item
+		// pairs correctly. Order: user, item_reference(rs_*),
+		// assistant text, user.
+		require.Len(t, input, 4)
 
-		// Verify no reasoning item leaked through.
 		for _, item := range input {
 			require.Nil(t, item.OfReasoning,
-				"reasoning items must not appear when store=true")
+				"reasoning items must not appear inline when store=true")
+		}
+
+		require.NotNil(t, input[1].OfItemReference,
+			"expected reasoning replayed via item_reference")
+		require.Equal(t, reasoningItemID, input[1].OfItemReference.ID)
+	})
+
+	t.Run("store true skips reasoning when item id missing", func(t *testing.T) {
+		t.Parallel()
+
+		noIDPrompt := fantasy.Prompt{
+			{
+				Role: fantasy.MessageRoleUser,
+				Content: []fantasy.MessagePart{
+					fantasy.TextPart{Text: "What is 2+2?"},
+				},
+			},
+			{
+				Role: fantasy.MessageRoleAssistant,
+				Content: []fantasy.MessagePart{
+					fantasy.ReasoningPart{
+						Text: "thinking",
+						ProviderOptions: fantasy.ProviderOptions{
+							Name: &ResponsesReasoningMetadata{Summary: []string{}},
+						},
+					},
+					fantasy.TextPart{Text: "4"},
+				},
+			},
+			{
+				Role: fantasy.MessageRoleUser,
+				Content: []fantasy.MessagePart{
+					fantasy.TextPart{Text: "And 3+3?"},
+				},
+			},
+		}
+
+		input, warnings, err := toResponsesPrompt(noIDPrompt, "system", true)
+
+		require.NoError(t, err)
+		require.Empty(t, warnings)
+
+		// Without an ItemID we cannot reference the reasoning
+		// item. Order: user, assistant text, user.
+		require.Len(t, input, 3)
+		for _, item := range input {
+			require.Nil(t, item.OfReasoning)
+			require.Nil(t, item.OfItemReference)
 		}
 	})
 
@@ -4097,10 +4146,193 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 		for _, item := range input {
 			require.Nil(t, item.OfReasoning,
 				"reasoning items must not appear when store=false")
+			require.Nil(t, item.OfItemReference,
+				"reasoning item_reference must not appear when store=false")
 		}
 	})
 }
 
+func TestResponsesToPrompt_ReasoningWithWebSearchCombined(t *testing.T) {
+	t.Parallel()
+
+	reasoningItemID := "rs_002"
+	webSearchItemID := "ws_002"
+
+	prompt := fantasy.Prompt{
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "What is the weather in San Francisco?"},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleAssistant,
+			Content: []fantasy.MessagePart{
+				fantasy.ReasoningPart{
+					Text: "I should look this up.",
+					ProviderOptions: fantasy.ProviderOptions{
+						Name: &ResponsesReasoningMetadata{
+							ItemID:  reasoningItemID,
+							Summary: []string{},
+						},
+					},
+				},
+				fantasy.ToolCallPart{
+					ToolCallID:       webSearchItemID,
+					ToolName:         "web_search",
+					ProviderExecuted: true,
+				},
+				fantasy.ToolResultPart{
+					ToolCallID:       webSearchItemID,
+					ProviderExecuted: true,
+				},
+				fantasy.TextPart{Text: "Sunny."},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "And Tokyo?"},
+			},
+		},
+	}
+
+	t.Run("store true pairs reasoning and web search", func(t *testing.T) {
+		t.Parallel()
+
+		input, warnings, err := toResponsesPrompt(prompt, "system", true)
+
+		require.NoError(t, err)
+		require.Empty(t, warnings)
+
+		// Order: user, item_reference(rs_*), item_reference(ws_*),
+		// assistant text, user.
+		require.Len(t, input, 5)
+
+		require.NotNil(t, input[1].OfItemReference)
+		require.Equal(t, reasoningItemID, input[1].OfItemReference.ID,
+			"reasoning item_reference must precede web_search item_reference")
+
+		require.NotNil(t, input[2].OfItemReference)
+		require.Equal(t, webSearchItemID, input[2].OfItemReference.ID)
+	})
+
+	t.Run("store false skips both reasoning and provider tool call", func(t *testing.T) {
+		t.Parallel()
+
+		input, warnings, err := toResponsesPrompt(prompt, "system", false)
+
+		require.NoError(t, err)
+		require.Empty(t, warnings)
+
+		// Both reasoning and the provider-executed web_search_call
+		// are skipped under store=false. user, assistant text, user.
+		require.Len(t, input, 3)
+		for _, item := range input {
+			require.Nil(t, item.OfItemReference)
+			require.Nil(t, item.OfReasoning)
+		}
+	})
+}
+
+func TestResponsesToPrompt_WebSearchRequiresReasoningReference(t *testing.T) {
+	t.Parallel()
+
+	input, warnings, err := toResponsesPrompt(fantasy.Prompt{
+		{
+			Role: fantasy.MessageRoleAssistant,
+			Content: []fantasy.MessagePart{
+				fantasy.ReasoningPart{
+					Text: "I should search.",
+					ProviderOptions: fantasy.ProviderOptions{
+						Name: &ResponsesReasoningMetadata{Summary: []string{}},
+					},
+				},
+				fantasy.ToolCallPart{
+					ToolCallID:       "ws_missing_reasoning",
+					ToolName:         "web_search",
+					ProviderExecuted: true,
+				},
+				fantasy.TextPart{Text: "Search completed."},
+			},
+		},
+	}, "system", true)
+
+	require.NoError(t, err)
+	require.Empty(t, warnings)
+	require.Len(t, input, 1)
+	require.NotNil(t, input[0].OfMessage)
+}
+
+func TestResponsesToPrompt_ReasoningWithFunctionCallCombined(t *testing.T) {
+	t.Parallel()
+
+	reasoningItemID := "rs_003"
+	functionCallID := "call_003"
+
+	prompt := fantasy.Prompt{
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "compute 1+1"},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleAssistant,
+			Content: []fantasy.MessagePart{
+				fantasy.ReasoningPart{
+					Text: "I'll call add.",
+					ProviderOptions: fantasy.ProviderOptions{
+						Name: &ResponsesReasoningMetadata{
+							ItemID:  reasoningItemID,
+							Summary: []string{},
+						},
+					},
+				},
+				fantasy.ToolCallPart{
+					ToolCallID: functionCallID,
+					ToolName:   "add",
+					Input:      `{"a":1,"b":1}`,
+				},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleTool,
+			Content: []fantasy.MessagePart{
+				fantasy.ToolResultPart{
+					ToolCallID: functionCallID,
+					Output:     fantasy.ToolResultOutputContentText{Text: "2"},
+				},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "thanks"},
+			},
+		},
+	}
+
+	input, warnings, err := toResponsesPrompt(prompt, "system", true)
+
+	require.NoError(t, err)
+	require.Empty(t, warnings)
+
+	// Order: user, item_reference(rs_003), function_call(call_003),
+	// function_call_output(call_003), user.
+	require.Len(t, input, 5)
+
+	require.NotNil(t, input[1].OfItemReference)
+	require.Equal(t, reasoningItemID, input[1].OfItemReference.ID,
+		"reasoning item_reference must precede function_call")
+
+	require.NotNil(t, input[2].OfFunctionCall)
+	require.Equal(t, functionCallID, input[2].OfFunctionCall.CallID)
+
+	require.NotNil(t, input[3].OfFunctionCallOutput)
+	require.Equal(t, functionCallID, input[3].OfFunctionCallOutput.CallID)
+}
+
 func TestResponsesStream_WebSearchResponse(t *testing.T) {
 	t.Parallel()
 
diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go
index 897b57216..03786710e 100644
--- a/providers/openai/responses_language_model.go
+++ b/providers/openai/responses_language_model.go
@@ -516,6 +516,7 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 
 		case fantasy.MessageRoleAssistant:
 			startIdx := len(input)
+			lastEmittedReasoningReference := false
 			for _, c := range msg.Content {
 				switch c.GetType() {
 				case fantasy.ContentTypeText:
@@ -528,6 +529,7 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 						continue
 					}
 					input = append(input, responses.ResponseInputItemParamOfMessage(textPart.Text, responses.EasyInputMessageRoleAssistant))
+					lastEmittedReasoningReference = false
 
 				case fantasy.ContentTypeToolCall:
 					toolCallPart, ok := fantasy.AsContentType[fantasy.ToolCallPart](c)
@@ -540,9 +542,12 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 					}
 
 					if toolCallPart.ProviderExecuted {
-						// Manual replay cannot safely reference stored
-						// provider-executed items without previous_response_id.
-						// Skip them, matching provider-executed tool results.
+						if store && lastEmittedReasoningReference &&
+							isResponsesWebSearchToolCall(toolCallPart) &&
+							toolCallPart.ToolCallID != "" {
+							input = append(input, responses.ResponseInputItemParamOfItemReference(toolCallPart.ToolCallID))
+						}
+						lastEmittedReasoningReference = false
 						continue
 					}
 
@@ -556,21 +561,35 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 					}
 
 					input = append(input, responses.ResponseInputItemParamOfFunctionCall(string(inputJSON), toolCallPart.ToolCallID, toolCallPart.ToolName))
+					lastEmittedReasoningReference = false
 				case fantasy.ContentTypeSource:
 					// Source citations from web search are not a
 					// recognised Responses API input type; skip.
 					continue
 				case fantasy.ContentTypeReasoning:
-					// Reasoning items are always skipped during replay.
-					// When store is enabled, the API already has them
-					// persisted server-side. When store is disabled, the
-					// item IDs are ephemeral and referencing them causes
-					// "Item not found" errors. In both cases, replaying
-					// reasoning inline is not supported by the API.
+					lastEmittedReasoningReference = false
+					if !store {
+						// When store is disabled, server-side reasoning
+						// items are ephemeral and cannot be referenced.
+						continue
+					}
+					reasoningPart, ok := fantasy.AsContentType[fantasy.ReasoningPart](c)
+					if !ok {
+						warnings = append(warnings, fantasy.CallWarning{
+							Type:    fantasy.CallWarningTypeOther,
+							Message: "assistant reasoning part does not have the right type",
+						})
+						continue
+					}
+					meta := GetReasoningMetadata(reasoningPart.ProviderOptions)
+					if meta == nil || meta.ItemID == "" {
+						continue
+					}
+					input = append(input, responses.ResponseInputItemParamOfItemReference(meta.ItemID))
+					lastEmittedReasoningReference = true
 					continue
 				}
 			}
-
 			if !hasVisibleResponsesAssistantContent(input, startIdx) {
 				warnings = append(warnings, fantasy.CallWarning{
 					Type:    fantasy.CallWarningTypeOther,
@@ -636,58 +655,116 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 		}
 	}
 
-	if err := validateResponsesFunctionCallOutputs(input); err != nil {
+	if err := validateResponsesInput(input); err != nil {
 		return nil, warnings, err
 	}
 
 	return input, warnings, nil
 }
 
+func isResponsesWebSearchToolCall(toolCallPart fantasy.ToolCallPart) bool {
+	return toolCallPart.ToolName == "web_search" ||
+		toolCallPart.ToolName == "web_search_preview"
+}
+
+func validateResponsesInput(input responses.ResponseInputParam) error {
+	if err := validateResponsesFunctionCallOutputs(input); err != nil {
+		return err
+	}
+	return validateResponsesItemReferences(input)
+}
+
 func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam) error {
-	functionCalls := make(map[string]int)
-	functionCallOutputs := make(map[string]int)
-	var functionCallIDs []string
-	var functionCallOutputIDs []string
+	type callState struct {
+		calls       int
+		outputs     int
+		firstCall   int
+		firstOutput int
+	}
+	states := make(map[string]*callState)
+	var callIDs []string
+	var outputIDs []string
+
+	stateFor := func(callID string) *callState {
+		state, ok := states[callID]
+		if ok {
+			return state
+		}
+		state = &callState{firstCall: -1, firstOutput: -1}
+		states[callID] = state
+		return state
+	}
 
-	for _, item := range input {
+	for index, item := range input {
 		if item.OfFunctionCall != nil {
 			callID := item.OfFunctionCall.CallID
-			if functionCalls[callID] == 0 {
-				functionCallIDs = append(functionCallIDs, callID)
+			state := stateFor(callID)
+			if state.calls == 0 {
+				callIDs = append(callIDs, callID)
+				state.firstCall = index
 			}
-			functionCalls[callID]++
+			state.calls++
 		}
 
 		if item.OfFunctionCallOutput != nil {
 			callID := item.OfFunctionCallOutput.CallID
-			if functionCallOutputs[callID] == 0 {
-				functionCallOutputIDs = append(functionCallOutputIDs, callID)
+			state := stateFor(callID)
+			if state.outputs == 0 {
+				outputIDs = append(outputIDs, callID)
+				state.firstOutput = index
 			}
-			functionCallOutputs[callID]++
+			state.outputs++
 		}
 	}
 
-	for _, callID := range functionCallIDs {
-		if functionCalls[callID] > 1 {
+	for _, callID := range callIDs {
+		state := states[callID]
+		if state.calls > 1 {
 			return fmt.Errorf("openai responses prompt has duplicate function_call for call_id %q", callID)
 		}
-		if functionCallOutputs[callID] == 0 {
-			return fmt.Errorf("openai responses prompt has function_call without function_call_output for call_id %q", callID)
-		}
 	}
-
-	for _, callID := range functionCallOutputIDs {
-		if functionCallOutputs[callID] > 1 {
+	for _, callID := range outputIDs {
+		state := states[callID]
+		if state.outputs > 1 {
 			return fmt.Errorf("openai responses prompt has duplicate function_call_output for call_id %q", callID)
 		}
-		if functionCalls[callID] == 0 {
+	}
+	for _, callID := range outputIDs {
+		state := states[callID]
+		if state.calls == 0 {
 			return fmt.Errorf("openai responses prompt has function_call_output without function_call for call_id %q", callID)
 		}
+		if state.firstOutput < state.firstCall {
+			return fmt.Errorf("openai responses prompt has function_call_output before function_call for call_id %q", callID)
+		}
+	}
+	for _, callID := range callIDs {
+		state := states[callID]
+		if state.outputs == 0 {
+			return fmt.Errorf("openai responses prompt has function_call without function_call_output for call_id %q", callID)
+		}
 	}
 
 	return nil
 }
 
+func validateResponsesItemReferences(input responses.ResponseInputParam) error {
+	previousReferenceID := ""
+	for _, item := range input {
+		if item.OfItemReference == nil {
+			previousReferenceID = ""
+			continue
+		}
+
+		itemID := item.OfItemReference.ID
+		if strings.HasPrefix(itemID, "ws_") && !strings.HasPrefix(previousReferenceID, "rs_") {
+			return fmt.Errorf("openai responses prompt has web_search_call item_reference without preceding reasoning item_reference for item_id %q", itemID)
+		}
+		previousReferenceID = itemID
+	}
+	return nil
+}
+
 func hasVisibleResponsesUserContent(content responses.ResponseInputMessageContentListParam) bool {
 	return len(content) > 0
 }
diff --git a/providers/openai/responses_params_test.go b/providers/openai/responses_params_test.go
index 8b92ab10b..ae0896f32 100644
--- a/providers/openai/responses_params_test.go
+++ b/providers/openai/responses_params_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 
 	"charm.land/fantasy"
+	"github.com/charmbracelet/openai-go/responses"
 	"github.com/stretchr/testify/require"
 )
 
@@ -439,6 +440,17 @@ func TestPrepareParams_ValidatesFunctionCallOutputPairing(t *testing.T) {
 		require.Empty(t, warnings)
 	})
 
+	t.Run("output before local call", func(t *testing.T) {
+		t.Parallel()
+
+		_, warnings, err := lm.prepareParams(testCall(fantasy.Prompt{
+			testResponsesToolResultMessage("call_late", "done"),
+			testResponsesToolCallMessage("call_late"),
+		}, nil))
+		require.EqualError(t, err, `openai responses prompt has function_call_output before function_call for call_id "call_late"`)
+		require.Empty(t, warnings)
+	})
+
 	t.Run("duplicate local calls", func(t *testing.T) {
 		t.Parallel()
 
@@ -474,6 +486,40 @@ func TestPrepareParams_ValidatesFunctionCallOutputPairing(t *testing.T) {
 	})
 }
 
+func TestValidateResponsesInput_WebSearchReferenceRequiresReasoning(t *testing.T) {
+	t.Parallel()
+
+	t.Run("valid reasoning and web search references", func(t *testing.T) {
+		t.Parallel()
+
+		err := validateResponsesInput(responses.ResponseInputParam{
+			responses.ResponseInputItemParamOfItemReference("rs_valid"),
+			responses.ResponseInputItemParamOfItemReference("ws_valid"),
+		})
+		require.NoError(t, err)
+	})
+
+	t.Run("web search reference without reasoning", func(t *testing.T) {
+		t.Parallel()
+
+		err := validateResponsesInput(responses.ResponseInputParam{
+			responses.ResponseInputItemParamOfItemReference("ws_orphan"),
+		})
+		require.EqualError(t, err, `openai responses prompt has web_search_call item_reference without preceding reasoning item_reference for item_id "ws_orphan"`)
+	})
+
+	t.Run("web search reference after non-reference item", func(t *testing.T) {
+		t.Parallel()
+
+		err := validateResponsesInput(responses.ResponseInputParam{
+			responses.ResponseInputItemParamOfItemReference("rs_valid"),
+			responses.ResponseInputItemParamOfMessage("text", responses.EasyInputMessageRoleAssistant),
+			responses.ResponseInputItemParamOfItemReference("ws_orphan"),
+		})
+		require.EqualError(t, err, `openai responses prompt has web_search_call item_reference without preceding reasoning item_reference for item_id "ws_orphan"`)
+	})
+}
+
 func responseInputItemsFromJSON(t *testing.T, encoded []byte) []map[string]any {
 	t.Helper()