From 22a0386dddf58b9bf062a8f63f712c0cb61dddfe Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 10 Oct 2025 15:06:56 -0500
Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=A4=96=20Add=20robust=20integration?=
 =?UTF-8?q?=20test=20for=20tricky=20message=20histories?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests various edge cases that could cause issues on resume:
- Reasoning-only messages
- Empty text content
- Reasoning followed by empty text
- Multiple reasoning blocks
- Whitespace-only text

Uses direct HistoryService manipulation instead of timing-sensitive
interrupts. Tests both Anthropic and OpenAI providers.
---
 tests/ipcMain/resumeStream.test.ts | 149 +++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/tests/ipcMain/resumeStream.test.ts b/tests/ipcMain/resumeStream.test.ts
index 4158e00fa..32dfae551 100644
--- a/tests/ipcMain/resumeStream.test.ts
+++ b/tests/ipcMain/resumeStream.test.ts
@@ -142,4 +142,153 @@ describeIntegration("IpcMain resumeStream integration tests", () => {
     },
     45000 // 45 second timeout for this test
   );
+
+  // Define tricky message histories that could cause issues
+  const trickyHistories = [
+    {
+      name: "reasoning-only",
+      description: "Assistant message with only reasoning, no text",
+      createMessage: (id: string) => ({
+        id,
+        role: "assistant" as const,
+        parts: [{ type: "reasoning" as const, text: "Let me think about this..." }],
+        metadata: { historySequence: 2, partial: true },
+      }),
+    },
+    {
+      name: "empty-text",
+      description: "Assistant message with empty text content",
+      createMessage: (id: string) => ({
+        id,
+        role: "assistant" as const,
+        parts: [{ type: "text" as const, text: "" }],
+        metadata: { historySequence: 2, partial: true },
+      }),
+    },
+    {
+      name: "reasoning-then-empty-text",
+      description: "Assistant message with reasoning followed by empty text",
+      createMessage: (id: string) => ({
+        id,
+        role: "assistant" as const,
+        parts: [
+          { type: "reasoning" as const, text: "Thinking deeply..." },
+          { type: "text" as const, text: "" },
+        ],
+        metadata: { historySequence: 2, partial: true },
+      }),
+    },
+    {
+      name: "multiple-reasoning-blocks",
+      description: "Assistant message with multiple reasoning blocks, no text",
+      createMessage: (id: string) => ({
+        id,
+        role: "assistant" as const,
+        parts: [
+          { type: "reasoning" as const, text: "First thought..." },
+          { type: "reasoning" as const, text: "Second thought..." },
+        ],
+        metadata: { historySequence: 2, partial: true },
+      }),
+    },
+    {
+      name: "whitespace-only-text",
+      description: "Assistant message with whitespace-only text content",
+      createMessage: (id: string) => ({
+        id,
+        role: "assistant" as const,
+        parts: [{ type: "text" as const, text: "   \n\t  " }],
+        metadata: { historySequence: 2, partial: true },
+      }),
+    },
+  ];
+
+  test.concurrent.each([
+    { provider: "anthropic" as const, model: "claude-sonnet-4-5" },
+    { provider: "openai" as const, model: "gpt-4o" },
+  ])(
+    "should handle resume with tricky message histories ($provider)",
+    async ({ provider, model }) => {
+      const { HistoryService } = await import("../../src/services/historyService");
+      const { createCmuxMessage } = await import("../../src/types/message");
+
+      for (const history of trickyHistories) {
+        const { env, workspaceId, cleanup } = await setupWorkspace(provider);
+        try {
+          // Create history service to directly manipulate messages
+          const historyService = new HistoryService(env.config);
+
+          // Create a user message first
+          const userMessage = createCmuxMessage(
+            `user-${Date.now()}`,
+            "user",
+            "Please help me with this task.",
+            { historySequence: 1 }
+          );
+
+          const userAppendResult = await historyService.appendToHistory(
+            workspaceId,
+            userMessage
+          );
+          expect(userAppendResult.success).toBe(true);
+
+          // Create the tricky assistant message
+          const trickyMessage = history.createMessage(`assistant-${Date.now()}`);
+
+          // Append the tricky message to history
+          const appendResult = await historyService.appendToHistory(
+            workspaceId,
+            trickyMessage
+          );
+          expect(appendResult.success).toBe(true);
+
+          // Clear events before resume
+          env.sentEvents.length = 0;
+
+          // Resume the stream with thinking enabled
+          // This exercises the context-aware filtering logic
+          const resumeResult = (await env.mockIpcRenderer.invoke(
+            IPC_CHANNELS.WORKSPACE_RESUME_STREAM,
+            workspaceId,
+            { model: `${provider}:${model}`, thinkingLevel: "high" }
+          )) as Result<void, SendMessageError>;
+
+          // Should succeed for all tricky histories with the fix
+          if (!resumeResult.success) {
+            console.error(
+              `[${provider}/${history.name}] Failed to resume:`,
+              resumeResult.error
+            );
+          }
+          expect(resumeResult.success).toBe(true);
+
+          // Verify the stream completes successfully
+          const collector = createEventCollector(env.sentEvents, workspaceId);
+          const streamEnd = await collector.waitForEvent("stream-end", 30000);
+          expect(streamEnd).toBeDefined();
+
+          // Verify no errors occurred during streaming
+          collector.collect();
+          const streamErrors = collector
+            .getEvents()
+            .filter((e) => "type" in e && e.type === "stream-error");
+
+          if (streamErrors.length > 0) {
+            console.error(
+              `[${provider}/${history.name}] Stream errors:`,
+              streamErrors
+            );
+          }
+          expect(streamErrors.length).toBe(0);
+
+          // Verify we received some content
+          const deltas = collector.getDeltas();
+          expect(deltas.length).toBeGreaterThan(0);
+        } finally {
+          await cleanup();
+        }
+      }
+    },
+    90000 // 90 second timeout - testing multiple scenarios per provider
+  );
 });

From 8dcff4046b1e26a44dbd0fa3745bc5e4e36240bb Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 10 Oct 2025 15:08:41 -0500
Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=A4=96=20Implement=20context-aware=20?=
 =?UTF-8?q?filtering=20to=20fix=20Anthropic=20thinking=20error?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add provider and thinking level context to filterEmptyAssistantMessages()
to handle edge case where Anthropic expects reasoning content when resuming
with thinking enabled.

The fix:
- Preserve reasoning-only messages for Anthropic when:
  * Provider is Anthropic
  * Thinking level is enabled (not 'off')
  * Message is partial (being resumed)
- Filter them out in all other cases (preserves existing behavior)

This prevents the error:
  "Expected `thinking` but found `text`"

When resuming an interrupted thinking stream with Anthropic.
---
 src/services/aiService.ts                   |  7 ++++-
 src/utils/messages/modelMessageTransform.ts | 33 +++++++++++++++++++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/services/aiService.ts b/src/services/aiService.ts
index 21a4a0e62..7ff2e4a41 100644
--- a/src/services/aiService.ts
+++ b/src/services/aiService.ts
@@ -429,7 +429,12 @@ export class AIService extends EventEmitter {
       const [providerName] = modelString.split(":");
 
       // Filter out assistant messages with only reasoning (no text/tools)
-      const filteredMessages = filterEmptyAssistantMessages(messages);
+      // Pass provider and thinking level for context-aware filtering
+      const filteredMessages = filterEmptyAssistantMessages(
+        messages,
+        providerName,
+        thinkingLevel
+      );
       log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`);
       log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages);
 
diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts
index e0542e868..eb90cf415 100644
--- a/src/utils/messages/modelMessageTransform.ts
+++ b/src/utils/messages/modelMessageTransform.ts
@@ -8,14 +8,26 @@ import type { CmuxMessage } from "@/types/message";
 
 /**
  * Filter out assistant messages that only contain reasoning parts (no text or tool parts).
- * These messages are invalid for the API and provide no value to the model.
+ * These messages are invalid for most API calls and provide no value to the model.
  * This happens when a message is interrupted during thinking before producing any text.
  *
+ * EXCEPTION: For Anthropic, when resuming a partial message with thinking enabled,
+ * we preserve reasoning-only messages. This is because Anthropic's extended thinking
+ * API expects reasoning content to be present in the history.
+ *
  * Note: This function filters out reasoning-only messages but does NOT strip reasoning
  * parts from messages that have other content. Reasoning parts are handled differently
  * per provider (see stripReasoningForOpenAI).
+ *
+ * @param messages - Messages to filter
+ * @param provider - AI provider (optional, for context-aware filtering)
+ * @param thinkingLevel - Thinking level setting (optional, for context-aware filtering)
  */
-export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] {
+export function filterEmptyAssistantMessages(
+  messages: CmuxMessage[],
+  provider?: string,
+  thinkingLevel?: string
+): CmuxMessage[] {
   return messages.filter((msg) => {
     // Keep all non-assistant messages
     if (msg.role !== "assistant") {
@@ -27,7 +39,22 @@ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessa
       (part) => (part.type === "text" && part.text) || part.type === "dynamic-tool"
     );
 
-    return hasContent;
+    if (hasContent) {
+      return true;
+    }
+
+    // For Anthropic with thinking enabled, preserve reasoning-only partial messages
+    // This prevents "Expected `thinking` but found `text`" errors on resume
+    if (
+      provider === "anthropic" &&
+      thinkingLevel && thinkingLevel !== "off" &&
+      msg.metadata?.partial
+    ) {
+      return true; // Keep reasoning-only messages for Anthropic thinking
+    }
+
+    // Otherwise filter out reasoning-only messages
+    return false;
   });
 }
 

From 3e9007b96eb3192ca119d90a9cd2c7cfc9c68fa9 Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 10 Oct 2025 15:10:59 -0500
Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=A4=96=20Apply=20prettier=20formattin?=
 =?UTF-8?q?g?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/services/aiService.ts | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/services/aiService.ts b/src/services/aiService.ts
index 7ff2e4a41..27202e059 100644
--- a/src/services/aiService.ts
+++ b/src/services/aiService.ts
@@ -430,11 +430,7 @@ export class AIService extends EventEmitter {
 
       // Filter out assistant messages with only reasoning (no text/tools)
       // Pass provider and thinking level for context-aware filtering
-      const filteredMessages = filterEmptyAssistantMessages(
-        messages,
-        providerName,
-        thinkingLevel
-      );
+      const filteredMessages = filterEmptyAssistantMessages(messages, providerName, thinkingLevel);
       log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`);
       log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages);
 

From a17581bf0e16c0f0b34838d8b7f57bebefebd9c8 Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 10 Oct 2025 15:13:14 -0500
Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=A4=96=20Apply=20prettier=20formattin?=
 =?UTF-8?q?g=20(CI=20fix)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/utils/messages/modelMessageTransform.ts |  3 ++-
 tests/ipcMain/resumeStream.test.ts          | 20 ++++----------------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts
index eb90cf415..3b4ffa356 100644
--- a/src/utils/messages/modelMessageTransform.ts
+++ b/src/utils/messages/modelMessageTransform.ts
@@ -47,7 +47,8 @@ export function filterEmptyAssistantMessages(
     // This prevents "Expected `thinking` but found `text`" errors on resume
     if (
       provider === "anthropic" &&
-      thinkingLevel && thinkingLevel !== "off" &&
+      thinkingLevel &&
+      thinkingLevel !== "off" &&
       msg.metadata?.partial
     ) {
       return true; // Keep reasoning-only messages for Anthropic thinking
diff --git a/tests/ipcMain/resumeStream.test.ts b/tests/ipcMain/resumeStream.test.ts
index 32dfae551..9df10aec8 100644
--- a/tests/ipcMain/resumeStream.test.ts
+++ b/tests/ipcMain/resumeStream.test.ts
@@ -226,20 +226,14 @@ describeIntegration("IpcMain resumeStream integration tests", () => {
             { historySequence: 1 }
           );
 
-          const userAppendResult = await historyService.appendToHistory(
-            workspaceId,
-            userMessage
-          );
+          const userAppendResult = await historyService.appendToHistory(workspaceId, userMessage);
           expect(userAppendResult.success).toBe(true);
 
           // Create the tricky assistant message
           const trickyMessage = history.createMessage(`assistant-${Date.now()}`);
 
           // Append the tricky message to history
-          const appendResult = await historyService.appendToHistory(
-            workspaceId,
-            trickyMessage
-          );
+          const appendResult = await historyService.appendToHistory(workspaceId, trickyMessage);
           expect(appendResult.success).toBe(true);
 
           // Clear events before resume
@@ -255,10 +249,7 @@ describeIntegration("IpcMain resumeStream integration tests", () => {
 
           // Should succeed for all tricky histories with the fix
           if (!resumeResult.success) {
-            console.error(
-              `[${provider}/${history.name}] Failed to resume:`,
-              resumeResult.error
-            );
+            console.error(`[${provider}/${history.name}] Failed to resume:`, resumeResult.error);
           }
           expect(resumeResult.success).toBe(true);
 
@@ -274,10 +265,7 @@ describeIntegration("IpcMain resumeStream integration tests", () => {
             .filter((e) => "type" in e && e.type === "stream-error");
 
           if (streamErrors.length > 0) {
-            console.error(
-              `[${provider}/${history.name}] Stream errors:`,
-              streamErrors
-            );
+            console.error(`[${provider}/${history.name}] Stream errors:`, streamErrors);
           }
           expect(streamErrors.length).toBe(0);