diff --git a/src/services/aiService.ts b/src/services/aiService.ts index 5ddb67b5c2..669b745bc5 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -19,6 +19,7 @@ import { validateAnthropicCompliance, addInterruptedSentinel, filterEmptyAssistantMessages, + stripReasoningForOpenAI, } from "@/utils/messages/modelMessageTransform"; import { applyCacheControl } from "@/utils/ai/cacheStrategy"; import type { HistoryService } from "./historyService"; @@ -278,11 +279,23 @@ export class AIService extends EventEmitter { // Dump original messages for debugging log.debug_obj(`${workspaceId}/1_original_messages.json`, messages); + // Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic") + const [providerName] = modelString.split(":"); + // Filter out assistant messages with only reasoning (no text/tools) - const filteredMessages = filterEmptyAssistantMessages(messages); + let filteredMessages = filterEmptyAssistantMessages(messages); log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`); log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages); + // OpenAI-specific: Strip reasoning parts from history + // OpenAI manages reasoning via previousResponseId; sending Anthropic-style reasoning + // parts creates orphaned reasoning items that cause API errors + if (providerName === "openai") { + filteredMessages = stripReasoningForOpenAI(filteredMessages); + log.debug("Stripped reasoning parts for OpenAI"); + log.debug_obj(`${workspaceId}/1b_openai_stripped.json`, filteredMessages); + } + // Add [INTERRUPTED] sentinel to partial messages (for model context) const messagesWithSentinel = addInterruptedSentinel(filteredMessages); @@ -293,9 +306,6 @@ export class AIService extends EventEmitter { log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages); - // Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic") - const [providerName] = modelString.split(":"); - // Apply ModelMessage transforms based on provider requirements const transformedMessages = transformModelMessages(modelMessages, providerName); diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index cd993f22e4..6fdab7509d 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -10,6 +10,10 @@ import type { CmuxMessage } from "@/types/message"; * Filter out assistant messages that only contain reasoning parts (no text or tool parts). * These messages are invalid for the API and provide no value to the model. * This happens when a message is interrupted during thinking before producing any text. + * + * Note: This function filters out reasoning-only messages but does NOT strip reasoning + * parts from messages that have other content. Reasoning parts are handled differently + * per provider (see stripReasoningForOpenAI). */ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] { return messages.filter((msg) => { @@ -27,6 +31,36 @@ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessa }); } +/** + * Strip reasoning parts from messages for OpenAI. + * + * OpenAI's Responses API uses encrypted reasoning items (with IDs like rs_*) that are + * managed automatically via previous_response_id. When reasoning parts from history + * (which are Anthropic-style text-based reasoning) are sent to OpenAI, they create + * orphaned reasoning items that cause "reasoning without following item" errors. + * + * Anthropic's reasoning (text-based) is different and SHOULD be sent back via sendReasoning. + * + * @param messages - Messages that may contain reasoning parts + * @returns Messages with reasoning parts stripped (for OpenAI only) + */ +export function stripReasoningForOpenAI(messages: CmuxMessage[]): CmuxMessage[] { + return messages.map((msg) => { + // Only process assistant messages + if (msg.role !== "assistant") { + return msg; + } + + // Strip reasoning parts - OpenAI manages reasoning via previousResponseId + const filteredParts = msg.parts.filter((part) => part.type !== "reasoning"); + + return { + ...msg, + parts: filteredParts, + }; + }); +} + /** * Add [INTERRUPTED] sentinel to partial messages by inserting a user message. * This helps the model understand that a message was interrupted and incomplete.