diff --git a/src/services/aiService.ts b/src/services/aiService.ts index d638aaad84..5fc5f845b6 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -599,7 +599,11 @@ export class AIService extends EventEmitter { // Convert MuxMessage to ModelMessage format using Vercel AI SDK utility // Type assertion needed because MuxMessage has custom tool parts for interrupted tools // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument - const modelMessages = convertToModelMessages(sanitizedMessages as any); + const modelMessages = convertToModelMessages(sanitizedMessages as any, { + // Drop unfinished tool calls (input-streaming/input-available) so downstream + // transforms only see tool calls that actually produced outputs. + ignoreIncompleteToolCalls: true, + }); log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages); // Apply ModelMessage transforms based on provider requirements diff --git a/src/utils/messages/modelMessageTransform.test.ts b/src/utils/messages/modelMessageTransform.test.ts index b711e1653c..5b0b2e66a0 100644 --- a/src/utils/messages/modelMessageTransform.test.ts +++ b/src/utils/messages/modelMessageTransform.test.ts @@ -29,65 +29,13 @@ describe("modelMessageTransform", () => { expect(result).toEqual(messages); }); - it("should keep text-only messages unchanged", () => { - const assistantMsg1: AssistantModelMessage = { - role: "assistant", - content: [{ type: "text", text: "Let me help you with that." }], - }; - const assistantMsg2: AssistantModelMessage = { - role: "assistant", - content: [{ type: "text", text: "Here's the result." }], - }; - const messages: ModelMessage[] = [assistantMsg1, assistantMsg2]; - - const result = transformModelMessages(messages, "anthropic"); - expect(result).toEqual(messages); - }); - - it("should strip tool calls without results (interrupted mixed content)", () => { + it("should split mixed text and tool-call content into ordered segments", () => { const assistantMsg: AssistantModelMessage = { role: "assistant", content: [ - { type: "text", text: "Let me check that for you." }, - { type: "tool-call", toolCallId: "call1", toolName: "bash", input: { script: "ls" } }, - ], - }; - const messages: ModelMessage[] = [assistantMsg]; - - const result = transformModelMessages(messages, "anthropic"); - - // Should only keep text, strip interrupted tool calls - expect(result).toHaveLength(1); - expect(result[0].role).toBe("assistant"); - expect((result[0] as AssistantModelMessage).content).toEqual([ - { type: "text", text: "Let me check that for you." }, - ]); - }); - - it("should strip tool-only messages without results (orphaned tool calls)", () => { - const assistantMsg: AssistantModelMessage = { - role: "assistant", - content: [ - { type: "tool-call", toolCallId: "call1", toolName: "bash", input: { script: "ls" } }, - ], - }; - const messages: ModelMessage[] = [assistantMsg]; - - const result = transformModelMessages(messages, "anthropic"); - - // Should filter out the entire message since it only has orphaned tool calls - expect(result).toHaveLength(0); - }); - - it("should handle partial results (some tool calls interrupted)", () => { - // Assistant makes 3 tool calls, but only 2 have results (3rd was interrupted) - const assistantMsg: AssistantModelMessage = { - role: "assistant", - content: [ - { type: "text", text: "Let me check a few things." }, + { type: "text", text: "Before" }, { type: "tool-call", toolCallId: "call1", toolName: "bash", input: { script: "pwd" } }, - { type: "tool-call", toolCallId: "call2", toolName: "bash", input: { script: "ls" } }, - { type: "tool-call", toolCallId: "call3", toolName: "bash", input: { script: "date" } }, + { type: "text", text: "After" }, ], }; const toolMsg: ToolModelMessage = { @@ -99,80 +47,41 @@ describe("modelMessageTransform", () => { toolName: "bash", output: { type: "json", value: { stdout: "/home/user" } }, }, - { - type: "tool-result", - toolCallId: "call2", - toolName: "bash", - output: { type: "json", value: { stdout: "file1 file2" } }, - }, - // call3 has no result (interrupted) ], }; - const messages: ModelMessage[] = [assistantMsg, toolMsg]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages([assistantMsg, toolMsg], "anthropic"); - // Should have: text message, tool calls (only call1 & call2), tool results - expect(result).toHaveLength(3); - - // First: text + expect(result).toHaveLength(4); expect(result[0].role).toBe("assistant"); expect((result[0] as AssistantModelMessage).content).toEqual([ - { type: "text", text: "Let me check a few things." }, + { type: "text", text: "Before" }, ]); - - // Second: only tool calls with results (call1, call2), NOT call3 expect(result[1].role).toBe("assistant"); - const toolCallContent = (result[1] as AssistantModelMessage).content; - expect(Array.isArray(toolCallContent)).toBe(true); - if (Array.isArray(toolCallContent)) { - expect(toolCallContent).toHaveLength(2); - expect(toolCallContent[0]).toEqual({ - type: "tool-call", - toolCallId: "call1", - toolName: "bash", - input: { script: "pwd" }, - }); - expect(toolCallContent[1]).toEqual({ - type: "tool-call", - toolCallId: "call2", - toolName: "bash", - input: { script: "ls" }, - }); - } - - // Third: tool results (only for call1 & call2) + expect((result[1] as AssistantModelMessage).content).toEqual([ + { type: "tool-call", toolCallId: "call1", toolName: "bash", input: { script: "pwd" } }, + ]); expect(result[2].role).toBe("tool"); - const toolResultContent = (result[2] as ToolModelMessage).content; - expect(toolResultContent).toHaveLength(2); - expect(toolResultContent[0]).toEqual({ + expect((result[2] as ToolModelMessage).content[0]).toEqual({ type: "tool-result", toolCallId: "call1", toolName: "bash", output: { type: "json", value: { stdout: "/home/user" } }, }); - expect(toolResultContent[1]).toEqual({ - type: "tool-result", - toolCallId: "call2", - toolName: "bash", - output: { type: "json", value: { stdout: "file1 file2" } }, - }); + expect(result[3].role).toBe("assistant"); + expect((result[3] as AssistantModelMessage).content).toEqual([ + { type: "text", text: "After" }, + ]); }); - it("should handle mixed content with tool results properly", () => { + it("should interleave multiple tool-call groups with their results", () => { const assistantMsg: AssistantModelMessage = { role: "assistant", content: [ - { type: "text", text: "First, let me check something." }, + { type: "text", text: "Step 1" }, { type: "tool-call", toolCallId: "call1", toolName: "bash", input: { script: "pwd" } }, + { type: "text", text: "Step 2" }, { type: "tool-call", toolCallId: "call2", toolName: "bash", input: { script: "ls" } }, - { type: "text", text: "Now let me check another thing." }, - { - type: "tool-call", - toolCallId: "call3", - toolName: "file_read", - input: { path: "test.txt" }, - }, ], }; const toolMsg: ToolModelMessage = { @@ -182,45 +91,60 @@ describe("modelMessageTransform", () => { type: "tool-result", toolCallId: "call1", toolName: "bash", - output: { type: "json", value: { stdout: "/home/user" } }, + output: { type: "json", value: { stdout: "/workspace" } }, }, { type: "tool-result", toolCallId: "call2", toolName: "bash", - output: { type: "json", value: { stdout: "file1 file2" } }, - }, - { - type: "tool-result", - toolCallId: "call3", - toolName: "file_read", - output: { type: "json", value: { content: "test content" } }, + output: { type: "json", value: { stdout: "file.txt" } }, }, ], }; - const messages: ModelMessage[] = [assistantMsg, toolMsg]; - const result = transformModelMessages(messages, "anthropic"); + const result = transformModelMessages([assistantMsg, toolMsg], "anthropic"); - // Should split into multiple messages with tool results properly placed - expect(result.length).toBeGreaterThan(2); - - // First should be text + expect(result).toHaveLength(6); expect(result[0].role).toBe("assistant"); expect((result[0] as AssistantModelMessage).content).toEqual([ - { type: "text", text: "First, let me check something." }, + { type: "text", text: "Step 1" }, ]); - - // Then tool calls with their results expect(result[1].role).toBe("assistant"); - const secondContent = (result[1] as AssistantModelMessage).content; - expect(Array.isArray(secondContent)).toBe(true); - if (Array.isArray(secondContent)) { - expect(secondContent.some((c) => c.type === "tool-call")).toBe(true); - } - - // Tool results should follow tool calls + expect((result[1] as AssistantModelMessage).content[0]).toEqual({ + type: "tool-call", + toolCallId: "call1", + toolName: "bash", + input: { script: "pwd" }, + }); expect(result[2].role).toBe("tool"); + expect((result[2] as ToolModelMessage).content[0]).toMatchObject({ toolCallId: "call1" }); + expect(result[3].role).toBe("assistant"); + expect((result[3] as AssistantModelMessage).content).toEqual([ + { type: "text", text: "Step 2" }, + ]); + expect(result[4].role).toBe("assistant"); + expect((result[4] as AssistantModelMessage).content[0]).toEqual({ + type: "tool-call", + toolCallId: "call2", + toolName: "bash", + input: { script: "ls" }, + }); + expect(result[5].role).toBe("tool"); + expect((result[5] as ToolModelMessage).content[0]).toMatchObject({ toolCallId: "call2" }); + }); + it("should keep text-only messages unchanged", () => { + const assistantMsg1: AssistantModelMessage = { + role: "assistant", + content: [{ type: "text", text: "Let me help you with that." }], + }; + const assistantMsg2: AssistantModelMessage = { + role: "assistant", + content: [{ type: "text", text: "Here's the result." }], + }; + const messages: ModelMessage[] = [assistantMsg1, assistantMsg2]; + + const result = transformModelMessages(messages, "anthropic"); + expect(result).toEqual(messages); }); }); @@ -659,10 +583,10 @@ describe("modelMessageTransform", () => { const result = transformModelMessages(messages, "openai"); - // Should have user, text, tool-call, tool-result (no reasoning) + // Should still contain user, assistant, and tool messages after filtering expect(result.length).toBeGreaterThan(2); - // Find the assistant message with text + // Find the assistant message with text (reasoning should remain alongside text) const textMessage = result.find((msg) => { if (msg.role !== "assistant") return false; const content = msg.content; @@ -672,9 +596,7 @@ describe("modelMessageTransform", () => { if (textMessage) { const content = (textMessage as AssistantModelMessage).content; if (Array.isArray(content)) { - // Should not have reasoning parts - expect(content.some((c) => c.type === "reasoning")).toBe(false); - // Should have text + expect(content.some((c) => c.type === "reasoning")).toBe(true); expect(content.some((c) => c.type === "text")).toBe(true); } } diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index 954266c644..5251fb8208 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -198,6 +198,11 @@ export function injectModeTransition( return result; } +/** + * Filter out assistant messages that only contain reasoning parts (no text or tool parts). + * Anthropic API rejects messages that have reasoning but no actual content. + * This happens when a message is interrupted during thinking before producing any text. + */ /** * Split assistant messages with mixed text and tool calls into separate messages * to comply with Anthropic's requirement that tool_use blocks must be immediately @@ -209,7 +214,6 @@ function splitMixedContentMessages(messages: ModelMessage[]): ModelMessage[] { for (let i = 0; i < messages.length; i++) { const msg = messages[i]; - // Only process assistant messages if (msg.role !== "assistant") { result.push(msg); continue; @@ -217,180 +221,118 @@ function splitMixedContentMessages(messages: ModelMessage[]): ModelMessage[] { const assistantMsg = msg; - // AssistantContent can be string or array, handle both cases if (typeof assistantMsg.content === "string") { - // If content is just a string, no tool calls to worry about result.push(msg); continue; } - // Check if this assistant message has both text and tool calls - const textParts = assistantMsg.content.filter((c) => c.type === "text" && c.text.trim()); const toolCallParts = assistantMsg.content.filter((c) => c.type === "tool-call"); - // Check if the next message is a tool result message + if (toolCallParts.length === 0) { + result.push(msg); + continue; + } + const nextMsg = messages[i + 1]; const hasToolResults = nextMsg?.role === "tool"; - // If no tool calls, keep as-is - if (toolCallParts.length === 0) { + if (!hasToolResults) { result.push(msg); continue; } - // If we have tool calls but no text - if (textParts.length === 0) { - if (hasToolResults) { - // Filter tool calls to only include those with results - const toolMsg = nextMsg; - const resultIds = new Set( - toolMsg.content - .filter((r) => r.type === "tool-result") - .map((r) => (r.type === "tool-result" ? r.toolCallId : "")) - ); + const toolMsg = nextMsg; - const validToolCalls = toolCallParts.filter( - (p) => p.type === "tool-call" && resultIds.has(p.toolCallId) - ); + type ContentArray = Exclude; + const groups: Array<{ type: "text" | "tool-call"; parts: ContentArray }> = []; + let currentGroup: { type: "text" | "tool-call"; parts: ContentArray } | null = null; - if (validToolCalls.length > 0) { - // Only include tool calls that have results - result.push({ - role: "assistant", - content: validToolCalls, - }); - } - // Skip if no valid tool calls remain + for (const part of assistantMsg.content) { + const partType = part.type === "tool-call" ? "tool-call" : "text"; + + // eslint-disable-next-line @typescript-eslint/prefer-optional-chain + if (!currentGroup || currentGroup.type !== partType) { + if (currentGroup) groups.push(currentGroup); + currentGroup = { type: partType, parts: [] }; } - // Skip orphaned tool calls - they violate API requirements + + currentGroup.parts.push(part); + } + + if (currentGroup) { + groups.push(currentGroup); + } + + if (groups.length <= 1) { + result.push(msg); continue; } - // If we have tool calls that will be followed by results, - // we need to ensure no text appears between them - if (hasToolResults) { - const toolMsg = nextMsg; - - // Find positions of text and tool calls in content array - const contentWithPositions = assistantMsg.content.map((c, idx) => ({ - content: c, - index: idx, - })); - - // Group consecutive parts by type - type ContentArray = Exclude; - const groups: Array<{ type: "text" | "tool-call"; parts: ContentArray }> = []; - let currentGroup: { type: "text" | "tool-call"; parts: ContentArray } | null = null; - - for (const item of contentWithPositions) { - const partType = item.content.type === "text" ? "text" : "tool-call"; - - // eslint-disable-next-line @typescript-eslint/prefer-optional-chain - if (!currentGroup || currentGroup.type !== partType) { - if (currentGroup) groups.push(currentGroup); - currentGroup = { type: partType, parts: [] }; + const toolResultsById = new Map>(); + for (const content of toolMsg.content) { + if (content.type === "tool-result") { + const existing = toolResultsById.get(content.toolCallId); + if (existing) { + existing.push(content); + } else { + toolResultsById.set(content.toolCallId, [content]); } + } + } - currentGroup.parts.push(item.content); + for (const group of groups) { + if (group.parts.length === 0) { + continue; } - if (currentGroup) groups.push(currentGroup); - - // If we have alternating text and tool calls, we need to split them - if (groups.length > 1) { - // Process each group - for (const group of groups) { - if (group.parts.length > 0) { - let partsToInclude = group.parts; - - // If this is a tool-call group, filter to only include tool calls that have results - if (group.type === "tool-call" && hasToolResults) { - // Get the IDs of tool calls that have results - const resultIds = new Set( - toolMsg.content - .filter((r) => r.type === "tool-result") - .map((r) => (r.type === "tool-result" ? r.toolCallId : "")) - ); - - // Only include tool calls that have corresponding results - partsToInclude = group.parts.filter( - (p) => p.type === "tool-call" && resultIds.has(p.toolCallId) - ); - } - - // Only create assistant message if there are parts to include - if (partsToInclude.length > 0) { - const newAssistantMsg: AssistantModelMessage = { - role: "assistant", - content: partsToInclude, - }; - result.push(newAssistantMsg); - - // If this group has tool calls that need results, - // add the tool results right after - if (group.type === "tool-call" && hasToolResults) { - // Get the tool call IDs from filtered parts - const toolCallIds = new Set( - partsToInclude - .filter((p) => p.type === "tool-call") - .map((p) => (p.type === "tool-call" ? p.toolCallId : "")) - .filter(Boolean) - ); - - // Filter the tool results to only include those for these tool calls - const relevantResults = toolMsg.content.filter( - (r) => r.type === "tool-result" && toolCallIds.has(r.toolCallId) - ); - - if (relevantResults.length > 0) { - const newToolMsg: ToolModelMessage = { - role: "tool", - content: relevantResults, - }; - result.push(newToolMsg); - } - } - } + if (group.type === "tool-call") { + const partsToInclude = group.parts.filter( + (p) => p.type === "tool-call" && toolResultsById.has(p.toolCallId) + ); + + if (partsToInclude.length === 0) { + continue; + } + + const newAssistantMsg: AssistantModelMessage = { + role: "assistant", + content: partsToInclude, + }; + result.push(newAssistantMsg); + + const relevantResults: ToolModelMessage["content"] = []; + for (const part of partsToInclude) { + if (part.type !== "tool-call") { + continue; + } + const results = toolResultsById.get(part.toolCallId); + if (results) { + relevantResults.push(...results); + toolResultsById.delete(part.toolCallId); } } - // Skip the original tool result message since we've redistributed its contents - if (hasToolResults) { - i++; // Skip next message + if (relevantResults.length > 0) { + const newToolMsg: ToolModelMessage = { + role: "tool", + content: relevantResults, + }; + result.push(newToolMsg); } } else { - // No splitting needed, keep as-is - result.push(msg); - } - } else { - // No tool results follow, which means these tool calls were interrupted - // Both Anthropic and OpenAI APIs require EVERY tool_use to have a tool_result, - // so we must strip out interrupted tool calls entirely. The text content with - // [INTERRUPTED] sentinel gives the model enough context. - - // Only include text parts (strip out interrupted tool calls) - if (textParts.length > 0) { - const textMsg: AssistantModelMessage = { + const newAssistantMsg: AssistantModelMessage = { role: "assistant", - content: textParts, + content: group.parts, }; - result.push(textMsg); + result.push(newAssistantMsg); } - - // DO NOT include tool calls without results - they violate API requirements - // The interrupted tool calls are preserved in chat.jsonl for UI display, but - // excluded from API calls since they have no results } + + i++; } return result; } - -/** - * Filter out assistant messages that only contain reasoning parts (no text or tool parts). - * Anthropic API rejects messages that have reasoning but no actual content. - * This happens when a message is interrupted during thinking before producing any text. - */ function filterReasoningOnlyMessages(messages: ModelMessage[]): ModelMessage[] { return messages.filter((msg) => { if (msg.role !== "assistant") { @@ -557,8 +499,7 @@ export function transformModelMessages(messages: ModelMessage[], provider: strin /** * Validate that the transformed messages follow Anthropic's requirements: - * - Every tool-call must be immediately followed by its tool-result - * - No text can appear between tool-call and tool-result + * - Every tool-call must be immediately followed by its tool-result message */ export function validateAnthropicCompliance(messages: ModelMessage[]): { valid: boolean;