🤖 Fix: web_search_call reasoning error - always strip item IDs (#84)

ammario · web-flow · commit 56b251c81ee2 · 2025-10-07T15:44:37.000-05:00
This PR fixes a critical bug in the OpenAI reasoning middleware where item IDs were only being stripped when a message contained reasoning parts. ## Problem The middleware had a condition `if (hasReasoning && ...)` that prevented stripping `providerOptions.openai` from parts in messages that didn't contain reasoning. This caused the error: ``` Item 'ws_*' of type 'web_search_call' was provided without its required 'reasoning' item: 'rs_*' ``` ## Root Cause Multi-step execution scenario: 1. **Message 1**: Has `reasoning + web_search_call` - Middleware strips reasoning → `hasReasoning = true` - Strips itemId from web_search_call ✅ 2. **Message 2**: Has ONLY `web_search_call` (no reasoning in this message) - `hasReasoning = false` - Does NOT strip itemId ❌ 3. OpenAI rejects message 2 because the itemId references a reasoning item that doesn't exist ## Solution Changed the condition from: ```typescript if (hasReasoning && typeof part === 'object' && part !== null) { ``` To: ```typescript if (typeof part === 'object' && part !== null) { ``` Now the middleware **always** strips `providerOptions.openai` from any part that has it, regardless of whether the message contains reasoning. This is correct because OpenAI manages all response context via the `previousResponseId` parameter, not via message content. ## Testing - ✅ Manually verified the fix logic - ✅ Type checking passes - ✅ Affects: web_search_call, tool-call, and any other OpenAI-specific part types ## Related - Fixes the web_search_call error reported after PR #77 was merged - Completes the reasoning error fix from PR #77 _Generated with `cmux`_
diff --git a/src/debug/replay-history.ts b/src/debug/replay-history.ts
@@ -0,0 +1,228 @@
+#!/usr/bin/env bun
+
+/**
+ * Debug script to replay a chat history and send a new message.
+ * Useful for reproducing errors with specific conversation contexts.
+ *
+ * Usage:
+ *   bun src/debug/replay-history.ts <history-file.json> <message> [--model <model>]
+ *
+ * Example:
+ *   bun src/debug/replay-history.ts /tmp/chat-broken.json "test message" --model openai:gpt-5-codex
+ */
+
+import * as fs from "fs";
+import * as path from "path";
+import { parseArgs } from "util";
+import { defaultConfig } from "@/config";
+import type { CmuxMessage } from "@/types/message";
+import { createCmuxMessage } from "@/types/message";
+import { AIService } from "@/services/aiService";
+import { HistoryService } from "@/services/historyService";
+import { PartialService } from "@/services/partialService";
+
+const { positionals, values } = parseArgs({
+  args: process.argv.slice(2),
+  options: {
+    model: { type: "string", short: "m" },
+    thinking: { type: "string", short: "t" },
+  },
+  allowPositionals: true,
+});
+
+const historyFile = positionals[0];
+const messageText = positionals[1];
+
+if (!historyFile || !messageText) {
+  console.error(
+    "Usage: bun src/debug/replay-history.ts <history-file.json> <message> [--model <model>]"
+  );
+  console.error(
+    "Example: bun src/debug/replay-history.ts /tmp/chat-broken.json 'test' --model openai:gpt-5-codex"
+  );
+  process.exit(1);
+}
+
+if (!fs.existsSync(historyFile)) {
+  console.error(`❌ History file not found: ${historyFile}`);
+  process.exit(1);
+}
+
+async function main() {
+  console.log(`\n=== Replay History Debug Tool ===\n`);
+  console.log(`History file: ${historyFile}`);
+  console.log(`Message: ${messageText}`);
+  console.log(`Model: ${values.model ?? "default (openai:gpt-5-codex)"}\n`);
+
+  // Read history
+  const historyContent = fs.readFileSync(historyFile, "utf-8");
+  let messages: CmuxMessage[];
+
+  try {
+    // Try parsing as JSON array first
+    messages = JSON.parse(historyContent) as CmuxMessage[];
+    if (!Array.isArray(messages)) {
+      messages = [messages];
+    }
+  } catch {
+    // Try parsing as JSONL
+    messages = historyContent
+      .split("\n")
+      .filter((line) => line.trim())
+      .map((line) => JSON.parse(line) as CmuxMessage);
+  }
+
+  console.log(`📝 Loaded ${messages.length} messages from history\n`);
+
+  // Display summary
+  for (const msg of messages) {
+    const preview =
+      msg.role === "user"
+        ? (msg.parts.find((p) => p.type === "text")?.text?.substring(0, 60) ?? "")
+        : `[${msg.parts.length} parts: ${msg.parts.map((p) => p.type).join(", ")}]`;
+    const model = msg.metadata?.model ?? "unknown";
+    console.log(`  ${msg.role.padEnd(9)} (${model}): ${preview}`);
+  }
+
+  // Create a temporary workspace
+  const workspaceId = `debug-replay-${Date.now()}`;
+  const sessionDir = defaultConfig.getSessionDir(workspaceId);
+  fs.mkdirSync(sessionDir, { recursive: true });
+
+  // Create workspace metadata
+  const metadataPath = path.join(sessionDir, "metadata.json");
+  fs.writeFileSync(
+    metadataPath,
+    JSON.stringify({
+      id: workspaceId,
+      projectName: "debug",
+      workspacePath: `/tmp/${workspaceId}`,
+    })
+  );
+
+  const chatHistoryPath = path.join(sessionDir, "chat.jsonl");
+
+  // Write history to temp workspace
+  const historyLines = messages.map((m) => JSON.stringify({ ...m, workspaceId })).join("\n");
+  fs.writeFileSync(chatHistoryPath, historyLines + "\n");
+
+  console.log(`\n✓ Created temporary workspace: ${workspaceId}`);
+
+  // Add new user message to the history
+  const userMessage = createCmuxMessage(
+    `user-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
+    "user",
+    messageText,
+    { timestamp: Date.now(), historySequence: messages.length }
+  );
+  messages.push(userMessage);
+
+  console.log(`\n📤 Sending message: "${messageText}"\n`);
+
+  // Initialize services - AIService creates its own StreamManager
+  const config = defaultConfig;
+  const historyService = new HistoryService(config);
+  const partialService = new PartialService(config, historyService);
+  const aiService = new AIService(config, historyService, partialService);
+
+  const modelString = values.model ?? "openai:gpt-5-codex";
+  const thinkingLevel = (values.thinking ?? "high") as "low" | "medium" | "high";
+
+  try {
+    // Stream the message - pass all messages including the new one
+    const result = await aiService.streamMessage(messages, workspaceId, modelString, thinkingLevel);
+
+    if (!result.success) {
+      console.error(`\n❌ Error:`, JSON.stringify(result.error, null, 2));
+      process.exit(1);
+    }
+
+    console.log(`✓ Stream started`);
+
+    // Wait for stream to complete
+    console.log(`\n⏳ Waiting for stream to complete...\n`);
+
+    // Subscribe to stream events
+    let hasError = false;
+    let errorMessage = "";
+
+    interface StreamEvent {
+      workspaceId: string;
+      type: string;
+      toolName?: string;
+      error?: string;
+    }
+
+    aiService.on("stream-event", (event: StreamEvent) => {
+      if (event.workspaceId !== workspaceId) return;
+
+      if (event.type === "stream-start") {
+        console.log(`[${event.type}] Started`);
+      } else if (event.type === "reasoning-delta" || event.type === "text-delta") {
+        // Don't log every delta, too verbose
+      } else if (event.type === "reasoning-end") {
+        console.log(`[${event.type}] Reasoning complete`);
+      } else if (event.type === "tool-call-start") {
+        console.log(`[${event.type}] Tool: ${event.toolName ?? "unknown"}`);
+      } else if (event.type === "tool-call-end") {
+        console.log(`[${event.type}] Tool complete`);
+      } else if (event.type === "stream-end") {
+        console.log(`[${event.type}] Stream complete`);
+      } else if (event.type === "stream-error") {
+        console.error(`\n❌ [${event.type}] ${event.error ?? "unknown error"}`);
+        errorMessage = event.error ?? "";
+        hasError = true;
+      } else {
+        console.log(`[${event.type}]`);
+      }
+    });
+
+    // Wait for completion
+    await new Promise<void>((resolve) => {
+      const checkInterval = setInterval(() => {
+        const streamManager = (
+          aiService as unknown as {
+            streamManager: { workspaceStreams: Map<string, { state: string }> };
+          }
+        ).streamManager;
+        const stream = streamManager.workspaceStreams.get(workspaceId);
+        if (!stream || stream.state === "completed" || stream.state === "error") {
+          clearInterval(checkInterval);
+          resolve();
+        }
+      }, 100);
+
+      // Timeout after 2 minutes
+      setTimeout(() => {
+        clearInterval(checkInterval);
+        resolve();
+      }, 120000);
+    });
+
+    if (hasError) {
+      console.log(`\n❌ Stream encountered an error:`);
+      console.log(errorMessage);
+
+      // Check if it's the web_search_call error
+      if (errorMessage.includes("web_search_call") && errorMessage.includes("reasoning")) {
+        console.log(`\n🎯 Reproduced the web_search_call + reasoning error!`);
+      }
+
+      process.exit(1);
+    }
+
+    console.log(`\n✅ Stream completed successfully!`);
+  } catch (error) {
+    console.error(`\n❌ Exception:`, error);
+    process.exit(1);
+  } finally {
+    // Cleanup
+    console.log(`\n🧹 Cleaning up temporary workspace...`);
+    fs.rmSync(sessionDir, { recursive: true, force: true });
+  }
+}
+
+main().catch((error) => {
+  console.error("Fatal error:", error);
+  process.exit(1);
+});
diff --git a/src/utils/ai/openaiReasoningMiddleware.ts b/src/utils/ai/openaiReasoningMiddleware.ts
@@ -48,7 +48,7 @@ export const openaiReasoningFixMiddleware: LanguageModelV2Middleware = {
         // Filter out reasoning content from assistant messages
         if (Array.isArray(message.content)) {
           // Check if this message contains reasoning
-          const hasReasoning = message.content.some(
+          const _hasReasoning = message.content.some(
             (part) =>
               typeof part === "object" &&
               part !== null &&
@@ -65,9 +65,9 @@ export const openaiReasoningFixMiddleware: LanguageModelV2Middleware = {
               return true;
             })
             .map((part) => {
-              // If we filtered out reasoning from this message, also strip OpenAI item IDs
-              // from remaining parts to avoid dangling references
-              if (hasReasoning && typeof part === "object" && part !== null) {
+              // Always strip OpenAI item IDs from parts that have them
+              // OpenAI manages these via previousResponseId, not via message content
+              if (typeof part === "object" && part !== null) {
                 // Check if part has providerOptions.openai.itemId
                 const partObj = part as unknown as Record<string, unknown>;
                 if (