diff --git a/src/services/aiService.ts b/src/services/aiService.ts index ea90a4ab2e..1073cd4bf5 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -19,7 +19,7 @@ import { validateAnthropicCompliance, addInterruptedSentinel, filterEmptyAssistantMessages, - stripReasoningForOpenAI, + clearProviderMetadataForOpenAI, } from "@/utils/messages/modelMessageTransform"; import { applyCacheControl } from "@/utils/ai/cacheStrategy"; import type { HistoryService } from "./historyService"; @@ -283,31 +283,31 @@ export class AIService extends EventEmitter { const [providerName] = modelString.split(":"); // Filter out assistant messages with only reasoning (no text/tools) - let filteredMessages = filterEmptyAssistantMessages(messages); + const filteredMessages = filterEmptyAssistantMessages(messages); log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`); log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages); - // OpenAI-specific: Strip reasoning parts from history - // OpenAI manages reasoning via previousResponseId; sending Anthropic-style reasoning - // parts creates orphaned reasoning items that cause API errors - if (providerName === "openai") { - filteredMessages = stripReasoningForOpenAI(filteredMessages); - log.debug("Stripped reasoning parts for OpenAI"); - log.debug_obj(`${workspaceId}/1b_openai_stripped.json`, filteredMessages); - } - // Add [INTERRUPTED] sentinel to partial messages (for model context) const messagesWithSentinel = addInterruptedSentinel(filteredMessages); // Convert CmuxMessage to ModelMessage format using Vercel AI SDK utility // Type assertion needed because CmuxMessage has custom tool parts for interrupted tools // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument - const modelMessages = convertToModelMessages(messagesWithSentinel as any); + let modelMessages = convertToModelMessages(messagesWithSentinel as any); log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages); + // OpenAI-specific: Clear provider metadata to prevent reasoning/tool errors + // OpenAI manages reasoning via previousResponseId; sending stale provider metadata + // from history causes "reasoning without following item" and tool call errors + if (providerName === "openai") { + modelMessages = clearProviderMetadataForOpenAI(modelMessages); + log.debug("Cleared provider metadata for OpenAI"); + log.debug_obj(`${workspaceId}/2a_openai_cleaned.json`, modelMessages); + } + // Apply ModelMessage transforms based on provider requirements - const transformedMessages = transformModelMessages(modelMessages, providerName); + const transformedMessages = transformModelMessages(modelMessages); // Apply cache control for Anthropic models AFTER transformation const finalMessages = applyCacheControl(transformedMessages, modelString); @@ -387,8 +387,7 @@ export class AIService extends EventEmitter { timestamp: Date.now(), }, providerOptions, - maxOutputTokens, - toolPolicy + maxOutputTokens ); if (!streamResult.success) { diff --git a/src/utils/messages/modelMessageTransform.test.ts b/src/utils/messages/modelMessageTransform.test.ts index 0db2f0574d..c6af6d14b9 100644 --- a/src/utils/messages/modelMessageTransform.test.ts +++ b/src/utils/messages/modelMessageTransform.test.ts @@ -23,7 +23,7 @@ describe("modelMessageTransform", () => { assistantMsg, ]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); expect(result).toEqual(messages); }); @@ -38,7 +38,7 @@ describe("modelMessageTransform", () => { }; const messages: ModelMessage[] = [assistantMsg1, assistantMsg2]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); expect(result).toEqual(messages); }); @@ -52,7 +52,7 @@ describe("modelMessageTransform", () => { }; const messages: ModelMessage[] = [assistantMsg]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); // Should only keep text, strip interrupted tool calls expect(result).toHaveLength(1); @@ -71,7 +71,7 @@ describe("modelMessageTransform", () => { }; const messages: ModelMessage[] = [assistantMsg]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); // Should filter out the entire message since it only has orphaned tool calls expect(result).toHaveLength(0); @@ -108,7 +108,7 @@ describe("modelMessageTransform", () => { }; const messages: ModelMessage[] = [assistantMsg, toolMsg]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); // Should have: text message, tool calls (only call1 & call2), tool results expect(result).toHaveLength(3); @@ -198,7 +198,7 @@ describe("modelMessageTransform", () => { }; const messages: ModelMessage[] = [assistantMsg, toolMsg]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); // Should split into multiple messages with tool results properly placed expect(result.length).toBeGreaterThan(2); @@ -323,7 +323,7 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); expect(result).toHaveLength(1); expect(result[0].role).toBe("user"); expect((result[0].content as Array<{ type: string; text: string }>)[0].text).toBe("Hello"); @@ -341,7 +341,7 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); expect(result).toHaveLength(1); expect(result[0].role).toBe("user"); expect((result[0].content as Array<{ type: string; text: string }>)[0].text).toBe( @@ -365,7 +365,7 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); expect(result).toHaveLength(1); expect(result[0].role).toBe("user"); expect((result[0].content as Array<{ type: string; text: string }>)[0].text).toBe( @@ -389,7 +389,7 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages(messages); expect(result).toHaveLength(3); expect(result[0].role).toBe("user"); expect((result[0].content as Array<{ type: string; text: string }>)[0].text).toBe("Hello"); @@ -517,8 +517,8 @@ describe("modelMessageTransform", () => { }); }); - describe("reasoning part stripping for OpenAI", () => { - it("should strip reasoning parts for OpenAI provider", () => { + describe("reasoning part handling", () => { + it("should preserve reasoning parts for both OpenAI and Anthropic", () => { const messages: ModelMessage[] = [ { role: "user", @@ -533,46 +533,28 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "openai"); + // Both providers should preserve reasoning parts + // OpenAI-specific metadata clearing is done in aiService.ts, not in transformModelMessages + const resultOpenAI = transformModelMessages(messages); + const resultAnthropic = transformModelMessages(messages); - // Should have 2 messages, assistant message should only have text - expect(result).toHaveLength(2); - expect(result[1].role).toBe("assistant"); - expect((result[1] as AssistantModelMessage).content).toEqual([ - { type: "text", text: "Here's the solution" }, - ]); - }); - - it("should preserve reasoning parts for Anthropic provider", () => { - const messages: ModelMessage[] = [ - { - role: "user", - content: [{ type: "text", text: "Solve this problem" }], - }, - { - role: "assistant", - content: [ - { type: "reasoning", text: "Let me think about this..." }, - { type: "text", text: "Here's the solution" }, - ], - }, - ]; + // Both should have 2 messages with reasoning and text preserved + expect(resultOpenAI).toHaveLength(2); + expect(resultAnthropic).toHaveLength(2); - const result = transformModelMessages(messages, "anthropic"); - - // Should have 2 messages, assistant message should have both reasoning and text - expect(result).toHaveLength(2); - expect(result[1].role).toBe("assistant"); - const content = (result[1] as AssistantModelMessage).content; - expect(Array.isArray(content)).toBe(true); - if (Array.isArray(content)) { - expect(content).toHaveLength(2); - expect(content[0]).toEqual({ type: "reasoning", text: "Let me think about this..." }); - expect(content[1]).toEqual({ type: "text", text: "Here's the solution" }); + for (const result of [resultOpenAI, resultAnthropic]) { + expect(result[1].role).toBe("assistant"); + const content = (result[1] as AssistantModelMessage).content; + expect(Array.isArray(content)).toBe(true); + if (Array.isArray(content)) { + expect(content).toHaveLength(2); + expect(content[0]).toEqual({ type: "reasoning", text: "Let me think about this..." }); + expect(content[1]).toEqual({ type: "text", text: "Here's the solution" }); + } } }); - it("should filter out reasoning-only messages for OpenAI", () => { + it("should filter out reasoning-only messages for all providers", () => { const messages: ModelMessage[] = [ { role: "user", @@ -584,14 +566,18 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "openai"); + // Both providers should filter reasoning-only messages + const resultOpenAI = transformModelMessages(messages); + const resultAnthropic = transformModelMessages(messages); - // Should only have user message, reasoning-only assistant message should be filtered out - expect(result).toHaveLength(1); - expect(result[0].role).toBe("user"); + // Should only have user message for both providers + expect(resultOpenAI).toHaveLength(1); + expect(resultOpenAI[0].role).toBe("user"); + expect(resultAnthropic).toHaveLength(1); + expect(resultAnthropic[0].role).toBe("user"); }); - it("should preserve tool calls when stripping reasoning for OpenAI", () => { + it("should preserve reasoning and tool calls in messages", () => { const messages: ModelMessage[] = [ { role: "user", @@ -618,9 +604,9 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "openai"); + const result = transformModelMessages(messages); - // Should have user, text, tool-call, tool-result (no reasoning) + // Should split into text message and tool-call/tool-result messages expect(result.length).toBeGreaterThan(2); // Find the assistant message with text @@ -633,8 +619,8 @@ describe("modelMessageTransform", () => { if (textMessage) { const content = (textMessage as AssistantModelMessage).content; if (Array.isArray(content)) { - // Should not have reasoning parts - expect(content.some((c) => c.type === "reasoning")).toBe(false); + // Should have reasoning parts preserved + expect(content.some((c) => c.type === "reasoning")).toBe(true); // Should have text expect(content.some((c) => c.type === "text")).toBe(true); } @@ -649,7 +635,7 @@ describe("modelMessageTransform", () => { expect(toolCallMessage).toBeDefined(); }); - it("should handle multiple reasoning parts for OpenAI", () => { + it("should coalesce multiple consecutive reasoning parts", () => { const messages: ModelMessage[] = [ { role: "user", @@ -665,14 +651,22 @@ describe("modelMessageTransform", () => { }, ]; - const result = transformModelMessages(messages, "openai"); + const result = transformModelMessages(messages); - // Should have 2 messages, assistant should only have text + // Should have 2 messages, assistant should have coalesced reasoning and text expect(result).toHaveLength(2); expect(result[1].role).toBe("assistant"); - expect((result[1] as AssistantModelMessage).content).toEqual([ - { type: "text", text: "Final answer" }, - ]); + const content = (result[1] as AssistantModelMessage).content; + expect(Array.isArray(content)).toBe(true); + if (Array.isArray(content)) { + // Should coalesce the two reasoning parts into one + expect(content).toHaveLength(2); + expect(content[0]).toEqual({ + type: "reasoning", + text: "First, I'll consider...Then, I'll analyze...", + }); + expect(content[1]).toEqual({ type: "text", text: "Final answer" }); + } }); }); }); diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index 6fdab7509d..9ec42553d8 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -32,32 +32,92 @@ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessa } /** - * Strip reasoning parts from messages for OpenAI. + * Clear provider metadata from ModelMessages for OpenAI to prevent reasoning/tool errors. * - * OpenAI's Responses API uses encrypted reasoning items (with IDs like rs_*) that are - * managed automatically via previous_response_id. When reasoning parts from history - * (which are Anthropic-style text-based reasoning) are sent to OpenAI, they create - * orphaned reasoning items that cause "reasoning without following item" errors. + * OpenAI's Responses API uses encrypted reasoning items (IDs like rs_*) that are + * managed automatically via previous_response_id. When these provider metadata + * references are sent back to OpenAI from stored history, they can cause errors: + * - "Item 'rs_*' of type 'reasoning' was provided without its required following item" + * - "referenced reasoning on a function_call was not provided" * - * Anthropic's reasoning (text-based) is different and SHOULD be sent back via sendReasoning. + * The solution is to blank out providerMetadata on reasoning parts and + * callProviderMetadata on tool-call parts. This lets OpenAI manage conversation + * state via previousResponseId without conflicting with stale metadata. * - * @param messages - Messages that may contain reasoning parts - * @returns Messages with reasoning parts stripped (for OpenAI only) + * Reference: https://github.com/vercel/ai/issues/7099 + * User solution: https://github.com/gvkhna/vibescraper + * + * @param messages - ModelMessages after convertToModelMessages() + * @returns Messages with provider metadata cleared (for OpenAI only) */ -export function stripReasoningForOpenAI(messages: CmuxMessage[]): CmuxMessage[] { +export function clearProviderMetadataForOpenAI(messages: ModelMessage[]): ModelMessage[] { return messages.map((msg) => { - // Only process assistant messages - if (msg.role !== "assistant") { - return msg; + // Process assistant messages (which may have reasoning/text/tool-call parts) + if (msg.role === "assistant") { + const assistantMsg = msg; + + // Handle string content (no parts to process) + if (typeof assistantMsg.content === "string") { + return msg; + } + + // Process content array and clear provider metadata + const cleanedContent = assistantMsg.content.map((part) => { + // Clear providerMetadata for text and reasoning parts + if ((part.type === "text" || part.type === "reasoning") && "providerMetadata" in part) { + return { + ...part, + providerMetadata: {}, + }; + } + + // Clear providerMetadata for tool-call parts + if (part.type === "tool-call" && "providerMetadata" in part) { + return { + ...part, + providerMetadata: {}, + }; + } + + return part; + }); + + return { + ...assistantMsg, + content: cleanedContent, + }; } - // Strip reasoning parts - OpenAI manages reasoning via previousResponseId - const filteredParts = msg.parts.filter((part) => part.type !== "reasoning"); + // Process tool messages (which may have tool-result parts with stale metadata) + if (msg.role === "tool") { + const toolMsg = msg; + + // Handle string content (no parts to process) + if (typeof toolMsg.content === "string") { + return msg; + } - return { - ...msg, - parts: filteredParts, - }; + // Process content array and clear provider metadata + const cleanedContent = toolMsg.content.map((part) => { + // Clear providerMetadata for tool-result parts + if (part.type === "tool-result" && "providerMetadata" in part) { + return { + ...part, + providerMetadata: {}, + }; + } + + return part; + }); + + return { + ...toolMsg, + content: cleanedContent, + }; + } + + // Other message types (user, system) pass through unchanged + return msg; }); } @@ -122,7 +182,10 @@ function splitMixedContentMessages(messages: ModelMessage[]): ModelMessage[] { } // Check if this assistant message has both text and tool calls - const textParts = assistantMsg.content.filter((c) => c.type === "text" && c.text.trim()); + // Note: Reasoning parts are treated like text parts (they stay together) + const textParts = assistantMsg.content.filter( + (c) => (c.type === "text" && c.text.trim()) || c.type === "reasoning" + ); const toolCallParts = assistantMsg.content.filter((c) => c.type === "tool-call"); // Check if the next message is a tool result message @@ -180,7 +243,9 @@ function splitMixedContentMessages(messages: ModelMessage[]): ModelMessage[] { let currentGroup: { type: "text" | "tool-call"; parts: ContentArray } | null = null; for (const item of contentWithPositions) { - const partType = item.content.type === "text" ? "text" : "tool-call"; + // Reasoning parts are treated as text (they go together with text) + const partType = + item.content.type === "text" || item.content.type === "reasoning" ? "text" : "tool-call"; if (!currentGroup || currentGroup.type !== partType) { if (currentGroup) groups.push(currentGroup); @@ -305,37 +370,6 @@ function filterReasoningOnlyMessages(messages: ModelMessage[]): ModelMessage[] { }); } -/** - * Strip reasoning parts from assistant messages. - * OpenAI's Responses API has its own reasoning format (encrypted reasoning items with IDs). - * Anthropic's text-based reasoning parts are incompatible and must be removed. - * This function removes reasoning parts while preserving text and tool-call parts. - */ -function stripReasoningParts(messages: ModelMessage[]): ModelMessage[] { - return messages.map((msg) => { - // Only process assistant messages with array content - if (msg.role !== "assistant") { - return msg; - } - - const assistantMsg = msg; - - // Skip string content (no reasoning parts to strip) - if (typeof assistantMsg.content === "string") { - return msg; - } - - // Filter out reasoning parts, keep everything else - const filteredContent = assistantMsg.content.filter((part) => part.type !== "reasoning"); - - // If all content was filtered out, this message will be caught by filterReasoningOnlyMessages - return { - ...assistantMsg, - content: filteredContent, - }; - }); -} - /** * Coalesce consecutive parts of the same type within each message. * Streaming creates many individual text/reasoning parts; merge them for easier debugging. @@ -429,44 +463,30 @@ function mergeConsecutiveUserMessages(messages: ModelMessage[]): ModelMessage[] /** * Transform messages to ensure provider API compliance. - * Applies multiple transformation passes based on provider requirements: + * Applies multiple transformation passes: * 0. Coalesce consecutive parts (text/reasoning) - all providers, reduces JSON overhead * 1. Split mixed content messages (text + tool calls) - all providers - * 2. Strip/filter reasoning parts: - * - OpenAI: Strip all Anthropic reasoning parts (incompatible format) - * - Anthropic: Filter out reasoning-only messages (API rejects them) + * 2. Filter out reasoning-only messages - all providers * 3. Merge consecutive user messages - all providers * - * Note: encryptedContent stripping happens earlier in streamManager when tool results - * are first stored, not during message transformation. + * Note: Provider-specific handling (like clearing OpenAI metadata) happens in aiService.ts + * before/after this transformation. * * @param messages The messages to transform - * @param provider The provider name (e.g., "anthropic", "openai") */ -export function transformModelMessages(messages: ModelMessage[], provider: string): ModelMessage[] { +export function transformModelMessages(messages: ModelMessage[]): ModelMessage[] { // Pass 0: Coalesce consecutive parts to reduce JSON overhead from streaming (applies to all providers) const coalesced = coalesceConsecutiveParts(messages); // Pass 1: Split mixed content messages (applies to all providers) const split = splitMixedContentMessages(coalesced); - // Pass 2: Provider-specific reasoning handling - let reasoningHandled: ModelMessage[]; - if (provider === "openai") { - // OpenAI: Strip all reasoning parts (Anthropic's text-based reasoning is incompatible with OpenAI's format) - reasoningHandled = stripReasoningParts(split); - // Then filter out any messages that became empty after stripping - reasoningHandled = filterReasoningOnlyMessages(reasoningHandled); - } else if (provider === "anthropic") { - // Anthropic: Filter out reasoning-only messages (API rejects messages with only reasoning) - reasoningHandled = filterReasoningOnlyMessages(split); - } else { - // Unknown provider: no reasoning handling - reasoningHandled = split; - } + // Pass 2: Filter out reasoning-only messages (applies to all providers) + // Both Anthropic and OpenAI reject messages that have only reasoning parts + const reasoningFiltered = filterReasoningOnlyMessages(split); // Pass 3: Merge consecutive user messages (applies to all providers) - const merged = mergeConsecutiveUserMessages(reasoningHandled); + const merged = mergeConsecutiveUserMessages(reasoningFiltered); return merged; }