diff --git a/src/services/aiService.ts b/src/services/aiService.ts index 21a4a0e62..27202e059 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -429,7 +429,8 @@ export class AIService extends EventEmitter { const [providerName] = modelString.split(":"); // Filter out assistant messages with only reasoning (no text/tools) - const filteredMessages = filterEmptyAssistantMessages(messages); + // Pass provider and thinking level for context-aware filtering + const filteredMessages = filterEmptyAssistantMessages(messages, providerName, thinkingLevel); log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`); log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages); diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index e0542e868..3b4ffa356 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -8,14 +8,26 @@ import type { CmuxMessage } from "@/types/message"; /** * Filter out assistant messages that only contain reasoning parts (no text or tool parts). - * These messages are invalid for the API and provide no value to the model. + * These messages are invalid for most API calls and provide no value to the model. * This happens when a message is interrupted during thinking before producing any text. * + * EXCEPTION: For Anthropic, when resuming a partial message with thinking enabled, + * we preserve reasoning-only messages. This is because Anthropic's extended thinking + * API expects reasoning content to be present in the history. + * * Note: This function filters out reasoning-only messages but does NOT strip reasoning * parts from messages that have other content. Reasoning parts are handled differently * per provider (see stripReasoningForOpenAI). + * + * @param messages - Messages to filter + * @param provider - AI provider (optional, for context-aware filtering) + * @param thinkingLevel - Thinking level setting (optional, for context-aware filtering) */ -export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] { +export function filterEmptyAssistantMessages( + messages: CmuxMessage[], + provider?: string, + thinkingLevel?: string +): CmuxMessage[] { return messages.filter((msg) => { // Keep all non-assistant messages if (msg.role !== "assistant") { @@ -27,7 +39,23 @@ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessa (part) => (part.type === "text" && part.text) || part.type === "dynamic-tool" ); - return hasContent; + if (hasContent) { + return true; + } + + // For Anthropic with thinking enabled, preserve reasoning-only partial messages + // This prevents "Expected `thinking` but found `text`" errors on resume + if ( + provider === "anthropic" && + thinkingLevel && + thinkingLevel !== "off" && + msg.metadata?.partial + ) { + return true; // Keep reasoning-only messages for Anthropic thinking + } + + // Otherwise filter out reasoning-only messages + return false; }); } diff --git a/tests/ipcMain/resumeStream.test.ts b/tests/ipcMain/resumeStream.test.ts index 4158e00fa..9df10aec8 100644 --- a/tests/ipcMain/resumeStream.test.ts +++ b/tests/ipcMain/resumeStream.test.ts @@ -142,4 +142,141 @@ describeIntegration("IpcMain resumeStream integration tests", () => { }, 45000 // 45 second timeout for this test ); + + // Define tricky message histories that could cause issues + const trickyHistories = [ + { + name: "reasoning-only", + description: "Assistant message with only reasoning, no text", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [{ type: "reasoning" as const, text: "Let me think about this..." }], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "empty-text", + description: "Assistant message with empty text content", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [{ type: "text" as const, text: "" }], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "reasoning-then-empty-text", + description: "Assistant message with reasoning followed by empty text", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [ + { type: "reasoning" as const, text: "Thinking deeply..." }, + { type: "text" as const, text: "" }, + ], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "multiple-reasoning-blocks", + description: "Assistant message with multiple reasoning blocks, no text", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [ + { type: "reasoning" as const, text: "First thought..." }, + { type: "reasoning" as const, text: "Second thought..." }, + ], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "whitespace-only-text", + description: "Assistant message with whitespace-only text content", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [{ type: "text" as const, text: " \n\t " }], + metadata: { historySequence: 2, partial: true }, + }), + }, + ]; + + test.concurrent.each([ + { provider: "anthropic" as const, model: "claude-sonnet-4-5" }, + { provider: "openai" as const, model: "gpt-4o" }, + ])( + "should handle resume with tricky message histories ($provider)", + async ({ provider, model }) => { + const { HistoryService } = await import("../../src/services/historyService"); + const { createCmuxMessage } = await import("../../src/types/message"); + + for (const history of trickyHistories) { + const { env, workspaceId, cleanup } = await setupWorkspace(provider); + try { + // Create history service to directly manipulate messages + const historyService = new HistoryService(env.config); + + // Create a user message first + const userMessage = createCmuxMessage( + `user-${Date.now()}`, + "user", + "Please help me with this task.", + { historySequence: 1 } + ); + + const userAppendResult = await historyService.appendToHistory(workspaceId, userMessage); + expect(userAppendResult.success).toBe(true); + + // Create the tricky assistant message + const trickyMessage = history.createMessage(`assistant-${Date.now()}`); + + // Append the tricky message to history + const appendResult = await historyService.appendToHistory(workspaceId, trickyMessage); + expect(appendResult.success).toBe(true); + + // Clear events before resume + env.sentEvents.length = 0; + + // Resume the stream with thinking enabled + // This exercises the context-aware filtering logic + const resumeResult = (await env.mockIpcRenderer.invoke( + IPC_CHANNELS.WORKSPACE_RESUME_STREAM, + workspaceId, + { model: `${provider}:${model}`, thinkingLevel: "high" } + )) as Result; + + // Should succeed for all tricky histories with the fix + if (!resumeResult.success) { + console.error(`[${provider}/${history.name}] Failed to resume:`, resumeResult.error); + } + expect(resumeResult.success).toBe(true); + + // Verify the stream completes successfully + const collector = createEventCollector(env.sentEvents, workspaceId); + const streamEnd = await collector.waitForEvent("stream-end", 30000); + expect(streamEnd).toBeDefined(); + + // Verify no errors occurred during streaming + collector.collect(); + const streamErrors = collector + .getEvents() + .filter((e) => "type" in e && e.type === "stream-error"); + + if (streamErrors.length > 0) { + console.error(`[${provider}/${history.name}] Stream errors:`, streamErrors); + } + expect(streamErrors.length).toBe(0); + + // Verify we received some content + const deltas = collector.getDeltas(); + expect(deltas.length).toBeGreaterThan(0); + } finally { + await cleanup(); + } + } + }, + 90000 // 90 second timeout - testing multiple scenarios per provider + ); });