From 2da9ed11fc817f56d860a245d63e4a76732f90f9 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 10 Dec 2025 11:43:46 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20perf:=20context-efficient=20plan?= =?UTF-8?q?=20mode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove planContent from propose_plan tool result to avoid redundant context (plan is already visible via file_edit_* diffs in history) - Include full plan content in mode transition message when switching from plan → exec mode, with soft framing asking model to evaluate relevance - Update ProposePlanToolCall UI to handle missing planContent with backwards compatibility for old chat history - Add tests for plan content in mode transitions Change-Id: I3d8ecdbc01805c229a73c1bf0afa9bc9fb93a5f9 Signed-off-by: Thomas Kosiewski --- .../components/tools/ProposePlanToolCall.tsx | 28 +++- .../messages/modelMessageTransform.test.ts | 149 ++++++++++++++++++ .../utils/messages/modelMessageTransform.ts | 18 ++- src/common/types/tools.ts | 5 +- src/node/services/aiService.ts | 128 +++++++++------ src/node/services/tools/propose_plan.ts | 7 +- 6 files changed, 272 insertions(+), 63 deletions(-) diff --git a/src/browser/components/tools/ProposePlanToolCall.tsx b/src/browser/components/tools/ProposePlanToolCall.tsx index bcf2051be4..d916a41e6d 100644 --- a/src/browser/components/tools/ProposePlanToolCall.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.tsx @@ -27,7 +27,8 @@ import { usePopoverError } from "@/browser/hooks/usePopoverError"; import { PopoverError } from "../PopoverError"; /** - * Check if the result is a successful file-based propose_plan result + * Check if the result is a successful file-based propose_plan result. + * Note: planContent may be absent in newer results (context optimization). */ function isProposePlanResult(result: unknown): result is ProposePlanToolResult { return ( @@ -35,11 +36,17 @@ function isProposePlanResult(result: unknown): result is ProposePlanToolResult { typeof result === "object" && "success" in result && result.success === true && - "planContent" in result && "planPath" in result ); } +/** + * Result type that may have planContent (for backwards compatibility with old chat history) + */ +interface ProposePlanResultWithContent extends ProposePlanToolResult { + planContent?: string; +} + /** * Check if the result is an error from propose_plan tool */ @@ -173,11 +180,20 @@ export const ProposePlanToolCall: React.FC = (props) = const titleMatch = /^#\s+(.+)$/m.exec(freshContent); planTitle = titleMatch ? titleMatch[1] : (planPath?.split("/").pop() ?? "Plan"); } else if (isProposePlanResult(result)) { - planContent = result.planContent; + // New format: planContent may be absent (context optimization) + // For backwards compatibility, check if planContent exists in old chat history + const resultWithContent = result as ProposePlanResultWithContent; planPath = result.planPath; - // Extract title from first markdown heading or use filename - const titleMatch = /^#\s+(.+)$/m.exec(result.planContent); - planTitle = titleMatch ? titleMatch[1] : (planPath.split("/").pop() ?? "Plan"); + if (resultWithContent.planContent) { + // Old result with embedded content (backwards compatibility) + planContent = resultWithContent.planContent; + const titleMatch = /^#\s+(.+)$/m.exec(resultWithContent.planContent); + planTitle = titleMatch ? titleMatch[1] : (planPath.split("/").pop() ?? "Plan"); + } else { + // New result without content - show path info, content is fetched for latest + planContent = `*Plan saved to ${planPath}*`; + planTitle = planPath.split("/").pop() ?? "Plan"; + } } else if (isLegacyProposePlanResult(result)) { // Legacy format: title + plan passed directly (no file) planContent = result.plan; diff --git a/src/browser/utils/messages/modelMessageTransform.test.ts b/src/browser/utils/messages/modelMessageTransform.test.ts index e2a57c9e82..450a92953a 100644 --- a/src/browser/utils/messages/modelMessageTransform.test.ts +++ b/src/browser/utils/messages/modelMessageTransform.test.ts @@ -874,6 +874,155 @@ describe("injectModeTransition", () => { text: "[Mode switched from plan to exec. Follow exec mode instructions.]", }); }); + + it("should include plan content when transitioning from plan to exec", () => { + const messages: MuxMessage[] = [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Let's plan a feature" }], + metadata: { timestamp: 1000 }, + }, + { + id: "assistant-1", + role: "assistant", + parts: [{ type: "text", text: "Here's the plan..." }], + metadata: { timestamp: 2000, mode: "plan" }, + }, + { + id: "user-2", + role: "user", + parts: [{ type: "text", text: "Now execute it" }], + metadata: { timestamp: 3000 }, + }, + ]; + + const planContent = "# My Plan\n\n## Step 1\nDo something\n\n## Step 2\nDo more"; + const result = injectModeTransition(messages, "exec", undefined, planContent); + + expect(result.length).toBe(4); + const transitionMessage = result[2]; + expect(transitionMessage.role).toBe("user"); + expect(transitionMessage.metadata?.synthetic).toBe(true); + + const textPart = transitionMessage.parts[0]; + expect(textPart.type).toBe("text"); + if (textPart.type === "text") { + expect(textPart.text).toContain( + "[Mode switched from plan to exec. Follow exec mode instructions.]" + ); + expect(textPart.text).toContain("The following plan was developed in plan mode"); + expect(textPart.text).toContain(""); + expect(textPart.text).toContain(planContent); + expect(textPart.text).toContain(""); + } + }); + + it("should NOT include plan content when transitioning from exec to plan", () => { + const messages: MuxMessage[] = [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Done with feature" }], + metadata: { timestamp: 1000 }, + }, + { + id: "assistant-1", + role: "assistant", + parts: [{ type: "text", text: "Feature complete" }], + metadata: { timestamp: 2000, mode: "exec" }, + }, + { + id: "user-2", + role: "user", + parts: [{ type: "text", text: "Let's plan the next one" }], + metadata: { timestamp: 3000 }, + }, + ]; + + const planContent = "# Old Plan\n\nSome content"; + const result = injectModeTransition(messages, "plan", undefined, planContent); + + expect(result.length).toBe(4); + const transitionMessage = result[2]; + const textPart = transitionMessage.parts[0]; + if (textPart.type === "text") { + expect(textPart.text).toBe( + "[Mode switched from exec to plan. Follow plan mode instructions.]" + ); + expect(textPart.text).not.toContain(""); + } + }); + + it("should NOT include plan content when no plan content provided", () => { + const messages: MuxMessage[] = [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Let's plan" }], + metadata: { timestamp: 1000 }, + }, + { + id: "assistant-1", + role: "assistant", + parts: [{ type: "text", text: "Planning..." }], + metadata: { timestamp: 2000, mode: "plan" }, + }, + { + id: "user-2", + role: "user", + parts: [{ type: "text", text: "Execute" }], + metadata: { timestamp: 3000 }, + }, + ]; + + const result = injectModeTransition(messages, "exec", undefined, undefined); + + expect(result.length).toBe(4); + const transitionMessage = result[2]; + const textPart = transitionMessage.parts[0]; + if (textPart.type === "text") { + expect(textPart.text).toBe( + "[Mode switched from plan to exec. Follow exec mode instructions.]" + ); + expect(textPart.text).not.toContain(""); + } + }); + + it("should include both tools and plan content in transition message", () => { + const messages: MuxMessage[] = [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Plan done" }], + metadata: { timestamp: 1000 }, + }, + { + id: "assistant-1", + role: "assistant", + parts: [{ type: "text", text: "Plan ready" }], + metadata: { timestamp: 2000, mode: "plan" }, + }, + { + id: "user-2", + role: "user", + parts: [{ type: "text", text: "Go" }], + metadata: { timestamp: 3000 }, + }, + ]; + + const toolNames = ["file_read", "bash"]; + const planContent = "# Plan\n\nDo stuff"; + const result = injectModeTransition(messages, "exec", toolNames, planContent); + + expect(result.length).toBe(4); + const textPart = result[2].parts[0]; + if (textPart.type === "text") { + expect(textPart.text).toContain("Available tools: file_read, bash.]"); + expect(textPart.text).toContain(""); + expect(textPart.text).toContain(planContent); + } + }); }); describe("filterEmptyAssistantMessages", () => { diff --git a/src/browser/utils/messages/modelMessageTransform.ts b/src/browser/utils/messages/modelMessageTransform.ts index ce549be3aa..f340661889 100644 --- a/src/browser/utils/messages/modelMessageTransform.ts +++ b/src/browser/utils/messages/modelMessageTransform.ts @@ -112,15 +112,20 @@ export function addInterruptedSentinel(messages: MuxMessage[]): MuxMessage[] { * Inserts a synthetic user message before the final user message to signal the mode switch. * This provides temporal context that helps models understand they should follow new mode instructions. * + * When transitioning from plan → exec mode with plan content, includes the plan so the model + * can evaluate its relevance to the current request. + * * @param messages The conversation history * @param currentMode The mode for the upcoming assistant response (e.g., "plan", "exec") * @param toolNames Optional list of available tool names to include in transition message + * @param planContent Optional plan content to include when transitioning plan → exec * @returns Messages with mode transition context injected if needed */ export function injectModeTransition( messages: MuxMessage[], currentMode?: string, - toolNames?: string[] + toolNames?: string[], + planContent?: string ): MuxMessage[] { // No mode specified, nothing to do if (!currentMode) { @@ -175,6 +180,17 @@ export function injectModeTransition( transitionText += "]"; } + // When transitioning plan → exec with plan content, include the plan for context + if (lastMode === "plan" && currentMode === "exec" && planContent) { + transitionText += ` + +The following plan was developed in plan mode. Based on the user's message, determine if they have accepted the plan. If accepted and relevant, use it to guide your implementation: + + +${planContent} +`; + } + const transitionMessage: MuxMessage = { id: `mode-transition-${Date.now()}`, role: "user", diff --git a/src/common/types/tools.ts b/src/common/types/tools.ts index 17154b2775..18f57b3bd4 100644 --- a/src/common/types/tools.ts +++ b/src/common/types/tools.ts @@ -170,11 +170,12 @@ export type FileEditToolArgs = // Args derived from schema export type ProposePlanToolArgs = z.infer; -// Result type for new file-based propose_plan tool +// Result type for file-based propose_plan tool +// Note: planContent is NOT included to save context - plan is visible via file_edit_* diffs +// and will be included in mode transition message when switching to exec mode export interface ProposePlanToolResult { success: true; planPath: string; - planContent: string; message: string; } diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 3689d24dfc..f0cba36ce9 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -57,6 +57,7 @@ import { EnvHttpProxyAgent, type Dispatcher } from "undici"; import { getPlanFilePath } from "@/common/utils/planStorage"; import { getPlanModeInstruction } from "@/common/utils/ui/modeUtils"; import type { UIMode } from "@/common/types/mode"; +import { readFileString } from "@/node/utils/runtime/helpers"; // Export a standalone version of getToolsForModel for use in backend @@ -954,58 +955,8 @@ export class AIService extends EventEmitter { // Add [CONTINUE] sentinel to partial messages (for model context) const messagesWithSentinel = addInterruptedSentinel(filteredMessages); - // Inject mode transition context if mode changed from last assistant message - // Include tool names so model knows what tools are available in the new mode - const messagesWithModeContext = injectModeTransition( - messagesWithSentinel, - mode, - toolNamesForSentinel - ); - - // Inject file change notifications as user messages (preserves system message cache) - const messagesWithFileChanges = injectFileChangeNotifications( - messagesWithModeContext, - changedFileAttachments - ); - - // Apply centralized tool-output redaction BEFORE converting to provider ModelMessages - // This keeps the persisted/UI history intact while trimming heavy fields for the request - const redactedForProvider = applyToolOutputRedaction(messagesWithFileChanges); - log.debug_obj(`${workspaceId}/2a_redacted_messages.json`, redactedForProvider); - - // Sanitize tool inputs to ensure they are valid objects (not strings or arrays) - // This fixes cases where corrupted data in history has malformed tool inputs - // that would cause API errors like "Input should be a valid dictionary" - const sanitizedMessages = sanitizeToolInputs(redactedForProvider); - log.debug_obj(`${workspaceId}/2b_sanitized_messages.json`, sanitizedMessages); - - // Convert MuxMessage to ModelMessage format using Vercel AI SDK utility - // Type assertion needed because MuxMessage has custom tool parts for interrupted tools - // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument - const modelMessages = convertToModelMessages(sanitizedMessages as any, { - // Drop unfinished tool calls (input-streaming/input-available) so downstream - // transforms only see tool calls that actually produced outputs. - ignoreIncompleteToolCalls: true, - }); - log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages); - - // Apply ModelMessage transforms based on provider requirements - const transformedMessages = transformModelMessages(modelMessages, providerName); - - // Apply cache control for Anthropic models AFTER transformation - const finalMessages = applyCacheControl(transformedMessages, modelString); - log.debug_obj(`${workspaceId}/3_final_messages.json`, finalMessages); - - // Validate the messages meet Anthropic requirements (Anthropic only) - if (providerName === "anthropic") { - const validation = validateAnthropicCompliance(finalMessages); - if (!validation.valid) { - log.error( - `Anthropic compliance validation failed: ${validation.error ?? "unknown error"}` - ); - // Continue anyway, as the API might be more lenient - } - } + // Note: Further message processing (mode transition, file changes, etc.) happens + // after runtime is created below, as we need runtime to read the plan file // Get workspace metadata to retrieve workspace path const metadataResult = await this.getWorkspaceMetadata(workspaceId); @@ -1057,6 +1008,79 @@ export class AIService extends EventEmitter { : planModeInstruction; } + // Read plan content for mode transition (plan → exec) + // Only read if switching to exec mode and last assistant was in plan mode + let planContentForTransition: string | undefined; + if (mode === "exec") { + const lastAssistantMessage = [...filteredMessages] + .reverse() + .find((m) => m.role === "assistant"); + if (lastAssistantMessage?.metadata?.mode === "plan") { + const planFilePath = getPlanFilePath(workspaceId); + try { + const content = await readFileString(runtime, planFilePath); + if (content.trim()) { + planContentForTransition = content; + } + } catch { + // No plan file exists, proceed without plan content + } + } + } + + // Now inject mode transition context with plan content (runtime is now available) + const messagesWithModeContext = injectModeTransition( + messagesWithSentinel, + mode, + toolNamesForSentinel, + planContentForTransition + ); + + // Inject file change notifications as user messages (preserves system message cache) + const messagesWithFileChanges = injectFileChangeNotifications( + messagesWithModeContext, + changedFileAttachments + ); + + // Apply centralized tool-output redaction BEFORE converting to provider ModelMessages + // This keeps the persisted/UI history intact while trimming heavy fields for the request + const redactedForProvider = applyToolOutputRedaction(messagesWithFileChanges); + log.debug_obj(`${workspaceId}/2a_redacted_messages.json`, redactedForProvider); + + // Sanitize tool inputs to ensure they are valid objects (not strings or arrays) + // This fixes cases where corrupted data in history has malformed tool inputs + // that would cause API errors like "Input should be a valid dictionary" + const sanitizedMessages = sanitizeToolInputs(redactedForProvider); + log.debug_obj(`${workspaceId}/2b_sanitized_messages.json`, sanitizedMessages); + + // Convert MuxMessage to ModelMessage format using Vercel AI SDK utility + // Type assertion needed because MuxMessage has custom tool parts for interrupted tools + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument + const modelMessages = convertToModelMessages(sanitizedMessages as any, { + // Drop unfinished tool calls (input-streaming/input-available) so downstream + // transforms only see tool calls that actually produced outputs. + ignoreIncompleteToolCalls: true, + }); + log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages); + + // Apply ModelMessage transforms based on provider requirements + const transformedMessages = transformModelMessages(modelMessages, providerName); + + // Apply cache control for Anthropic models AFTER transformation + const finalMessages = applyCacheControl(transformedMessages, modelString); + log.debug_obj(`${workspaceId}/3_final_messages.json`, finalMessages); + + // Validate the messages meet Anthropic requirements (Anthropic only) + if (providerName === "anthropic") { + const validation = validateAnthropicCompliance(finalMessages); + if (!validation.valid) { + log.error( + `Anthropic compliance validation failed: ${validation.error ?? "unknown error"}` + ); + // Continue anyway, as the API might be more lenient + } + } + // Build system message from workspace metadata const systemMessage = await buildSystemMessage( metadata, diff --git a/src/node/services/tools/propose_plan.ts b/src/node/services/tools/propose_plan.ts index 05054d0df0..467b7d9c07 100644 --- a/src/node/services/tools/propose_plan.ts +++ b/src/node/services/tools/propose_plan.ts @@ -11,9 +11,13 @@ const proposePlanSchema = z.object({}); /** * Propose plan tool factory for AI assistant. - * The tool reads the plan from the plan file the agent wrote to. + * The tool validates the plan file exists and is non-empty. * If the plan file doesn't exist, it returns an error instructing * the agent to write the plan first. + * + * Note: Plan content is NOT included in the tool result to save context. + * The plan is already visible via file_edit_* diffs in history, and will be + * included in the mode transition message when switching to exec mode. */ export const createProposePlanTool: ToolFactory = (config) => { return tool({ @@ -66,7 +70,6 @@ export const createProposePlanTool: ToolFactory = (config) => { return { success: true as const, planPath, - planContent, message: "Plan proposed. Waiting for user approval.", }; },