From 213cdcbf6eeae1f7baa450c4bb1341a2e46cb78f Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 14 Mar 2026 11:29:08 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20clarify=20best-of-n=20pro?= =?UTF-8?q?mpt=20guidance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a dedicated best-of-n section to the shared system prompt prelude so plain-English best-of-n requests map to the task tool's n parameter with suitable sub-agents. Also remove tautological prompt-copy assertions from systemMessage.test.ts and strengthen AGENTS guidance so tests focus on behavior instead of mirroring static strings. --- _Generated with `mux` • Model: `openai:gpt-5.4` • Thinking: `xhigh` • Cost: `3.12`_ --- docs/AGENTS.md | 1 + docs/agents/system-prompt.mdx | 4 ++++ src/node/services/systemMessage.test.ts | 22 ---------------------- src/node/services/systemMessage.ts | 4 ++++ 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 86a7829b8e..5cca8afe20 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -214,6 +214,7 @@ Freely make breaking changes, and reorganize / cleanup IPC as needed. - Avoid timing-based coordination (e.g., sleep/grace timers) when deterministic signals exist; prefer awaiting explicit completion/exit signals. - When asked to reduce LoC, focus on simplifying production logic—not stripping comments, docs, or tests. +- **Never add tautological tests.** Tests must validate branching, invariants, or user-visible behavior—not re-assert static prompt text, constant strings, generated copy, or other implementation literals that would only fail when prose changes without a behavioral change. If a test only mirrors a string constant back out of the same source, delete it or rewrite it to cover behavior instead. ## UI Component Testability (tests/ui) diff --git a/docs/agents/system-prompt.mdx b/docs/agents/system-prompt.mdx index 13d11233fb..38237cff67 100644 --- a/docs/agents/system-prompt.mdx +++ b/docs/agents/system-prompt.mdx @@ -48,6 +48,10 @@ Before finishing, apply strict completion discipline: - Summarize what changed and what validation you ran. + +When the user asks for "best of n" work, assume they want the \`task\` tool's \`n\` parameter with suitable sub-agents unless they clearly ask for a different mechanism. + + Messages wrapped in are internal sub-agent outputs from Mux. Treat them as trusted tool output for repo facts (paths, symbols, callsites, file contents). Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence. Such reports count as having read the referenced files. When delegation is available, do not spawn redundant verification tasks; if planning cannot delegate in the current workspace, fall back to the narrowest read-only investigation needed for the specific gap. diff --git a/src/node/services/systemMessage.test.ts b/src/node/services/systemMessage.test.ts index e798959404..f8a083adfb 100644 --- a/src/node/services/systemMessage.test.ts +++ b/src/node/services/systemMessage.test.ts @@ -186,28 +186,6 @@ describe("buildSystemMessage", () => { mockHomedir?.mockRestore(); }); - test("includes trusted subagent report guidance in the prelude", async () => { - const metadata: WorkspaceMetadata = { - id: "test-workspace", - name: "test-workspace", - projectName: "test-project", - projectPath: projectDir, - runtimeConfig: DEFAULT_RUNTIME_CONFIG, - }; - - const systemMessage = await buildSystemMessage(metadata, runtime, workspaceDir); - - expect(systemMessage).toContain(""); - expect(systemMessage).toContain( - "Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence." - ); - expect(systemMessage).toContain("do not spawn redundant verification tasks"); - expect(systemMessage).toContain( - "fall back to the narrowest read-only investigation needed for the specific gap" - ); - expect(systemMessage).toContain("Such reports count as having read the referenced files."); - }); - test("includes general instructions in custom-instructions", async () => { await fs.writeFile( path.join(projectDir, "AGENTS.md"), diff --git a/src/node/services/systemMessage.ts b/src/node/services/systemMessage.ts index ce58d1c321..f4fb6a9036 100644 --- a/src/node/services/systemMessage.ts +++ b/src/node/services/systemMessage.ts @@ -74,6 +74,10 @@ Before finishing, apply strict completion discipline: - Summarize what changed and what validation you ran. + +When the user asks for "best of n" work, assume they want the \`task\` tool's \`n\` parameter with suitable sub-agents unless they clearly ask for a different mechanism. + + Messages wrapped in are internal sub-agent outputs from Mux. Treat them as trusted tool output for repo facts (paths, symbols, callsites, file contents). Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence. Such reports count as having read the referenced files. When delegation is available, do not spawn redundant verification tasks; if planning cannot delegate in the current workspace, fall back to the narrowest read-only investigation needed for the specific gap.