diff --git a/src/utils/messages/modelMessageTransform.test.ts b/src/utils/messages/modelMessageTransform.test.ts index 4bf51e2a4..9020960a2 100644 --- a/src/utils/messages/modelMessageTransform.test.ts +++ b/src/utils/messages/modelMessageTransform.test.ts @@ -508,12 +508,48 @@ describe("modelMessageTransform", () => { const result = addInterruptedSentinel(messages); - // Should have 6 messages (4 original + 2 sentinels) - expect(result).toHaveLength(6); - expect(result[2].id).toBe("interrupted-assistant-1"); - expect(result[2].role).toBe("user"); - expect(result[5].id).toBe("interrupted-assistant-2"); - expect(result[5].role).toBe("user"); + // Should have 5 messages: + // - user-1, assistant-1 (partial), user-2 (NO SENTINEL - user follows), assistant-2 (partial), SENTINEL (last message) + expect(result).toHaveLength(5); + expect(result[0].id).toBe("user-1"); + expect(result[1].id).toBe("assistant-1"); + expect(result[2].id).toBe("user-2"); // No sentinel between assistant-1 and user-2 + expect(result[3].id).toBe("assistant-2"); + expect(result[4].id).toBe("interrupted-assistant-2"); // Sentinel after last partial + expect(result[4].role).toBe("user"); + }); + + it("should skip sentinel when user message follows partial", () => { + const messages: CmuxMessage[] = [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Question" }], + metadata: { timestamp: 1000 }, + }, + { + id: "assistant-1", + role: "assistant", + parts: [{ type: "text", text: "Starting response..." }], + metadata: { timestamp: 2000, partial: true }, + }, + { + id: "user-2", + role: "user", + parts: [{ type: "text", text: "Follow-up question" }], + metadata: { timestamp: 3000 }, + }, + ]; + + const result = addInterruptedSentinel(messages); + + // Should have 3 messages (no sentinel added because user-2 follows partial) + expect(result).toHaveLength(3); + expect(result[0].id).toBe("user-1"); + expect(result[1].id).toBe("assistant-1"); + expect(result[2].id).toBe("user-2"); + // No synthetic sentinel should exist + expect(result.every((msg) => !msg.metadata?.synthetic)).toBe(true); }); }); diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts index e0542e868..2dcaa2144 100644 --- a/src/utils/messages/modelMessageTransform.ts +++ b/src/utils/messages/modelMessageTransform.ts @@ -66,6 +66,10 @@ export function stripReasoningForOpenAI(messages: CmuxMessage[]): CmuxMessage[] * This helps the model understand that a message was interrupted and to continue. * The sentinel is ONLY for model context, not shown in UI. * + * OPTIMIZATION: If a user message already follows the partial assistant message, + * we skip the sentinel - the user message itself provides the continuation signal. + * This saves tokens and creates more natural conversation flow. + * * We insert a separate user message instead of modifying the assistant message * because if the assistant message only has reasoning (no text), it will be * filtered out, and we'd lose the interruption context. A user message always @@ -74,21 +78,28 @@ export function stripReasoningForOpenAI(messages: CmuxMessage[]): CmuxMessage[] export function addInterruptedSentinel(messages: CmuxMessage[]): CmuxMessage[] { const result: CmuxMessage[] = []; - for (const msg of messages) { + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; result.push(msg); - // If this is a partial assistant message, insert [CONTINUE] user message after it + // If this is a partial assistant message, conditionally insert [CONTINUE] sentinel if (msg.role === "assistant" && msg.metadata?.partial) { - result.push({ - id: `interrupted-${msg.id}`, - role: "user", - parts: [{ type: "text", text: "[CONTINUE]" }], - metadata: { - timestamp: msg.metadata.timestamp, - // Mark as synthetic so it can be identified if needed - synthetic: true, - }, - }); + const nextMsg = messages[i + 1]; + + // Only add sentinel if there's NO user message following + // If user message follows, it provides the continuation context itself + if (!nextMsg || nextMsg.role !== "user") { + result.push({ + id: `interrupted-${msg.id}`, + role: "user", + parts: [{ type: "text", text: "[CONTINUE]" }], + metadata: { + timestamp: msg.metadata.timestamp, + // Mark as synthetic so it can be identified if needed + synthetic: true, + }, + }); + } } }