From 22a0386dddf58b9bf062a8f63f712c0cb61dddfe Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 10 Oct 2025 15:06:56 -0500 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=A4=96=20Add=20robust=20integration?= =?UTF-8?q?=20test=20for=20tricky=20message=20histories?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests various edge cases that could cause issues on resume: - Reasoning-only messages - Empty text content - Reasoning followed by empty text - Multiple reasoning blocks - Whitespace-only text Uses direct HistoryService manipulation instead of timing-sensitive interrupts. Tests both Anthropic and OpenAI providers. --- tests/ipcMain/resumeStream.test.ts | 149 +++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/tests/ipcMain/resumeStream.test.ts b/tests/ipcMain/resumeStream.test.ts index 4158e00fa..32dfae551 100644 --- a/tests/ipcMain/resumeStream.test.ts +++ b/tests/ipcMain/resumeStream.test.ts @@ -142,4 +142,153 @@ describeIntegration("IpcMain resumeStream integration tests", () => { }, 45000 // 45 second timeout for this test ); + + // Define tricky message histories that could cause issues + const trickyHistories = [ + { + name: "reasoning-only", + description: "Assistant message with only reasoning, no text", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [{ type: "reasoning" as const, text: "Let me think about this..." }], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "empty-text", + description: "Assistant message with empty text content", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [{ type: "text" as const, text: "" }], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "reasoning-then-empty-text", + description: "Assistant message with reasoning followed by empty text", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [ + { type: "reasoning" as const, text: "Thinking deeply..." }, + { type: "text" as const, text: "" }, + ], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "multiple-reasoning-blocks", + description: "Assistant message with multiple reasoning blocks, no text", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [ + { type: "reasoning" as const, text: "First thought..." }, + { type: "reasoning" as const, text: "Second thought..." }, + ], + metadata: { historySequence: 2, partial: true }, + }), + }, + { + name: "whitespace-only-text", + description: "Assistant message with whitespace-only text content", + createMessage: (id: string) => ({ + id, + role: "assistant" as const, + parts: [{ type: "text" as const, text: " \n\t " }], + metadata: { historySequence: 2, partial: true }, + }), + }, + ]; + + test.concurrent.each([ + { provider: "anthropic" as const, model: "claude-sonnet-4-5" }, + { provider: "openai" as const, model: "gpt-4o" }, + ])( + "should handle resume with tricky message histories ($provider)", + async ({ provider, model }) => { + const { HistoryService } = await import("../../src/services/historyService"); + const { createCmuxMessage } = await import("../../src/types/message"); + + for (const history of trickyHistories) { + const { env, workspaceId, cleanup } = await setupWorkspace(provider); + try { + // Create history service to directly manipulate messages + const historyService = new HistoryService(env.config); + + // Create a user message first + const userMessage = createCmuxMessage( + `user-${Date.now()}`, + "user", + "Please help me with this task.", + { historySequence: 1 } + ); + + const userAppendResult = await historyService.appendToHistory( + workspaceId, + userMessage + ); + expect(userAppendResult.success).toBe(true); + + // Create the tricky assistant message + const trickyMessage = history.createMessage(`assistant-${Date.now()}`); + + // Append the tricky message to history + const appendResult = await historyService.appendToHistory( + workspaceId, + trickyMessage + ); + expect(appendResult.success).toBe(true); + + // Clear events before resume + env.sentEvents.length = 0; + + // Resume the stream with thinking enabled + // This exercises the context-aware filtering logic + const resumeResult = (await env.mockIpcRenderer.invoke( + IPC_CHANNELS.WORKSPACE_RESUME_STREAM, + workspaceId, + { model: `${provider}:${model}`, thinkingLevel: "high" } + )) as Result; + + // Should succeed for all tricky histories with the fix + if (!resumeResult.success) { + console.error( + `[${provider}/${history.name}] Failed to resume:`, + resumeResult.error + ); + } + expect(resumeResult.success).toBe(true); + + // Verify the stream completes successfully + const collector = createEventCollector(env.sentEvents, workspaceId); + const streamEnd = await collector.waitForEvent("stream-end", 30000); + expect(streamEnd).toBeDefined(); + + // Verify no errors occurred during streaming + collector.collect(); + const streamErrors = collector + .getEvents() + .filter((e) => "type" in e && e.type === "stream-error"); + + if (streamErrors.length > 0) { + console.error( + `[${provider}/${history.name}] Stream errors:`, + streamErrors + ); + } + expect(streamErrors.length).toBe(0); + + // Verify we received some content + const deltas = collector.getDeltas(); + expect(deltas.length).toBeGreaterThan(0); + } finally { + await cleanup(); + } + } + }, + 90000 // 90 second timeout - testing multiple scenarios per provider + ); }); From 8dcff4046b1e26a44dbd0fa3745bc5e4e36240bb Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 10 Oct 2025 15:08:41 -0500 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=A4=96=20Implement=20context-aware=20?= =?UTF-8?q?filtering=20to=20fix=20Anthropic=20thinking=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add provider and thinking level context to filterEmptyAssistantMessages() to handle edge case where Anthropic expects reasoning content when resuming with thinking enabled. The fix: - Preserve reasoning-only messages for Anthropic when: * Provider is Anthropic * Thinking level is enabled (not 'off') * Message is partial (being resumed) - Filter them out in all other cases (preserves existing behavior) This prevents the error: "Expected `thinking` but found `text`" When resuming an interrupted thinking stream with Anthropic. --- src/services/aiService.ts | 7 ++++- src/utils/messages/modelMessageTransform.ts | 33 +++++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/services/aiService.ts b/src/services/aiService.ts index 21a4a0e62..7ff2e4a41 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -429,7 +429,12 @@ export class AIService extends EventEmitter { const [providerName] = modelString.split(":"); // Filter out assistant messages with only reasoning (no text/tools) - const filteredMessages = filterEmptyAssistantMessages(messages); + // Pass provider and thinking level for context-aware filtering + const filteredMessages = filterEmptyAssistantMessages( + messages, + providerName, + thinkingLevel + ); log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`); log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages); diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index e0542e868..eb90cf415 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -8,14 +8,26 @@ import type { CmuxMessage } from "@/types/message"; /** * Filter out assistant messages that only contain reasoning parts (no text or tool parts). - * These messages are invalid for the API and provide no value to the model. + * These messages are invalid for most API calls and provide no value to the model. * This happens when a message is interrupted during thinking before producing any text. * + * EXCEPTION: For Anthropic, when resuming a partial message with thinking enabled, + * we preserve reasoning-only messages. This is because Anthropic's extended thinking + * API expects reasoning content to be present in the history. + * * Note: This function filters out reasoning-only messages but does NOT strip reasoning * parts from messages that have other content. Reasoning parts are handled differently * per provider (see stripReasoningForOpenAI). + * + * @param messages - Messages to filter + * @param provider - AI provider (optional, for context-aware filtering) + * @param thinkingLevel - Thinking level setting (optional, for context-aware filtering) */ -export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] { +export function filterEmptyAssistantMessages( + messages: CmuxMessage[], + provider?: string, + thinkingLevel?: string +): CmuxMessage[] { return messages.filter((msg) => { // Keep all non-assistant messages if (msg.role !== "assistant") { @@ -27,7 +39,22 @@ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessa (part) => (part.type === "text" && part.text) || part.type === "dynamic-tool" ); - return hasContent; + if (hasContent) { + return true; + } + + // For Anthropic with thinking enabled, preserve reasoning-only partial messages + // This prevents "Expected `thinking` but found `text`" errors on resume + if ( + provider === "anthropic" && + thinkingLevel && thinkingLevel !== "off" && + msg.metadata?.partial + ) { + return true; // Keep reasoning-only messages for Anthropic thinking + } + + // Otherwise filter out reasoning-only messages + return false; }); } From 3e9007b96eb3192ca119d90a9cd2c7cfc9c68fa9 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 10 Oct 2025 15:10:59 -0500 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=A4=96=20Apply=20prettier=20formattin?= =?UTF-8?q?g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/services/aiService.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/services/aiService.ts b/src/services/aiService.ts index 7ff2e4a41..27202e059 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -430,11 +430,7 @@ export class AIService extends EventEmitter { // Filter out assistant messages with only reasoning (no text/tools) // Pass provider and thinking level for context-aware filtering - const filteredMessages = filterEmptyAssistantMessages( - messages, - providerName, - thinkingLevel - ); + const filteredMessages = filterEmptyAssistantMessages(messages, providerName, thinkingLevel); log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`); log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages); From a17581bf0e16c0f0b34838d8b7f57bebefebd9c8 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 10 Oct 2025 15:13:14 -0500 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=A4=96=20Apply=20prettier=20formattin?= =?UTF-8?q?g=20(CI=20fix)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/messages/modelMessageTransform.ts | 3 ++- tests/ipcMain/resumeStream.test.ts | 20 ++++---------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index eb90cf415..3b4ffa356 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -47,7 +47,8 @@ export function filterEmptyAssistantMessages( // This prevents "Expected `thinking` but found `text`" errors on resume if ( provider === "anthropic" && - thinkingLevel && thinkingLevel !== "off" && + thinkingLevel && + thinkingLevel !== "off" && msg.metadata?.partial ) { return true; // Keep reasoning-only messages for Anthropic thinking diff --git a/tests/ipcMain/resumeStream.test.ts b/tests/ipcMain/resumeStream.test.ts index 32dfae551..9df10aec8 100644 --- a/tests/ipcMain/resumeStream.test.ts +++ b/tests/ipcMain/resumeStream.test.ts @@ -226,20 +226,14 @@ describeIntegration("IpcMain resumeStream integration tests", () => { { historySequence: 1 } ); - const userAppendResult = await historyService.appendToHistory( - workspaceId, - userMessage - ); + const userAppendResult = await historyService.appendToHistory(workspaceId, userMessage); expect(userAppendResult.success).toBe(true); // Create the tricky assistant message const trickyMessage = history.createMessage(`assistant-${Date.now()}`); // Append the tricky message to history - const appendResult = await historyService.appendToHistory( - workspaceId, - trickyMessage - ); + const appendResult = await historyService.appendToHistory(workspaceId, trickyMessage); expect(appendResult.success).toBe(true); // Clear events before resume @@ -255,10 +249,7 @@ describeIntegration("IpcMain resumeStream integration tests", () => { // Should succeed for all tricky histories with the fix if (!resumeResult.success) { - console.error( - `[${provider}/${history.name}] Failed to resume:`, - resumeResult.error - ); + console.error(`[${provider}/${history.name}] Failed to resume:`, resumeResult.error); } expect(resumeResult.success).toBe(true); @@ -274,10 +265,7 @@ describeIntegration("IpcMain resumeStream integration tests", () => { .filter((e) => "type" in e && e.type === "stream-error"); if (streamErrors.length > 0) { - console.error( - `[${provider}/${history.name}] Stream errors:`, - streamErrors - ); + console.error(`[${provider}/${history.name}] Stream errors:`, streamErrors); } expect(streamErrors.length).toBe(0);