Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions src/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
validateAnthropicCompliance,
addInterruptedSentinel,
filterEmptyAssistantMessages,
stripReasoningForOpenAI,
} from "@/utils/messages/modelMessageTransform";
import { applyCacheControl } from "@/utils/ai/cacheStrategy";
import type { HistoryService } from "./historyService";
Expand Down Expand Up @@ -278,11 +279,23 @@ export class AIService extends EventEmitter {
// Dump original messages for debugging
log.debug_obj(`${workspaceId}/1_original_messages.json`, messages);

// Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic")
const [providerName] = modelString.split(":");

// Filter out assistant messages with only reasoning (no text/tools)
const filteredMessages = filterEmptyAssistantMessages(messages);
let filteredMessages = filterEmptyAssistantMessages(messages);
log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`);
log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages);

// OpenAI-specific: Strip reasoning parts from history
// OpenAI manages reasoning via previousResponseId; sending Anthropic-style reasoning
// parts creates orphaned reasoning items that cause API errors
if (providerName === "openai") {
filteredMessages = stripReasoningForOpenAI(filteredMessages);
log.debug("Stripped reasoning parts for OpenAI");
log.debug_obj(`${workspaceId}/1b_openai_stripped.json`, filteredMessages);
}

// Add [INTERRUPTED] sentinel to partial messages (for model context)
const messagesWithSentinel = addInterruptedSentinel(filteredMessages);

Expand All @@ -293,9 +306,6 @@ export class AIService extends EventEmitter {

log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages);

// Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic")
const [providerName] = modelString.split(":");

// Apply ModelMessage transforms based on provider requirements
const transformedMessages = transformModelMessages(modelMessages, providerName);

Expand Down
34 changes: 34 additions & 0 deletions src/utils/messages/modelMessageTransform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ import type { CmuxMessage } from "@/types/message";
* Filter out assistant messages that only contain reasoning parts (no text or tool parts).
* These messages are invalid for the API and provide no value to the model.
* This happens when a message is interrupted during thinking before producing any text.
*
* Note: This function filters out reasoning-only messages but does NOT strip reasoning
* parts from messages that have other content. Reasoning parts are handled differently
* per provider (see stripReasoningForOpenAI).
*/
export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] {
return messages.filter((msg) => {
Expand All @@ -27,6 +31,36 @@ export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessa
});
}

/**
* Strip reasoning parts from messages for OpenAI.
*
* OpenAI's Responses API uses encrypted reasoning items (with IDs like rs_*) that are
* managed automatically via previous_response_id. When reasoning parts from history
* (which are Anthropic-style text-based reasoning) are sent to OpenAI, they create
* orphaned reasoning items that cause "reasoning without following item" errors.
*
* Anthropic's reasoning (text-based) is different and SHOULD be sent back via sendReasoning.
*
* @param messages - Messages that may contain reasoning parts
* @returns Messages with reasoning parts stripped (for OpenAI only)
*/
export function stripReasoningForOpenAI(messages: CmuxMessage[]): CmuxMessage[] {
return messages.map((msg) => {
// Only process assistant messages
if (msg.role !== "assistant") {
return msg;
}

// Strip reasoning parts - OpenAI manages reasoning via previousResponseId
const filteredParts = msg.parts.filter((part) => part.type !== "reasoning");

return {
...msg,
parts: filteredParts,
};
});
}

/**
* Add [INTERRUPTED] sentinel to partial messages by inserting a user message.
* This helps the model understand that a message was interrupted and incomplete.
Expand Down