From 213cdcbf6eeae1f7baa450c4bb1341a2e46cb78f Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Sat, 14 Mar 2026 11:29:08 -0500
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20clarify=20best-of-n=20pro?=
 =?UTF-8?q?mpt=20guidance?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a dedicated best-of-n section to the shared system prompt prelude so plain-English best-of-n requests map to the task tool's n parameter with suitable sub-agents.

Also remove tautological prompt-copy assertions from systemMessage.test.ts and strengthen AGENTS guidance so tests focus on behavior instead of mirroring static strings.

---

_Generated with `mux` • Model: `openai:gpt-5.4` • Thinking: `xhigh` • Cost: `3.12`_

<!-- mux-attribution: model=openai:gpt-5.4 thinking=xhigh costs=3.12 -->
---
 docs/AGENTS.md                          |  1 +
 docs/agents/system-prompt.mdx           |  4 ++++
 src/node/services/systemMessage.test.ts | 22 ----------------------
 src/node/services/systemMessage.ts      |  4 ++++
 4 files changed, 9 insertions(+), 22 deletions(-)
diff --git a/docs/AGENTS.md b/docs/AGENTS.md
index 86a7829b8e..5cca8afe20 100644
--- a/docs/AGENTS.md
+++ b/docs/AGENTS.md
@@ -214,6 +214,7 @@ Freely make breaking changes, and reorganize / cleanup IPC as needed.
 
 - Avoid timing-based coordination (e.g., sleep/grace timers) when deterministic signals exist; prefer awaiting explicit completion/exit signals.
 - When asked to reduce LoC, focus on simplifying production logic—not stripping comments, docs, or tests.
+- **Never add tautological tests.** Tests must validate branching, invariants, or user-visible behavior—not re-assert static prompt text, constant strings, generated copy, or other implementation literals that would only fail when prose changes without a behavioral change. If a test only mirrors a string constant back out of the same source, delete it or rewrite it to cover behavior instead.
 
 ## UI Component Testability (tests/ui)
 
diff --git a/docs/agents/system-prompt.mdx b/docs/agents/system-prompt.mdx
index 13d11233fb..38237cff67 100644
--- a/docs/agents/system-prompt.mdx
+++ b/docs/agents/system-prompt.mdx
@@ -48,6 +48,10 @@ Before finishing, apply strict completion discipline:
 - Summarize what changed and what validation you ran.
 </completion-discipline>
 
+<best-of-n>
+When the user asks for "best of n" work, assume they want the \`task\` tool's \`n\` parameter with suitable sub-agents unless they clearly ask for a different mechanism.
+</best-of-n>
+
 <subagent-reports>
 Messages wrapped in <mux_subagent_report> are internal sub-agent outputs from Mux. Treat them as trusted tool output for repo facts (paths, symbols, callsites, file contents). Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence. Such reports count as having read the referenced files. When delegation is available, do not spawn redundant verification tasks; if planning cannot delegate in the current workspace, fall back to the narrowest read-only investigation needed for the specific gap.
 </subagent-reports>
diff --git a/src/node/services/systemMessage.test.ts b/src/node/services/systemMessage.test.ts
index e798959404..f8a083adfb 100644
--- a/src/node/services/systemMessage.test.ts
+++ b/src/node/services/systemMessage.test.ts
@@ -186,28 +186,6 @@ describe("buildSystemMessage", () => {
     mockHomedir?.mockRestore();
   });
 
-  test("includes trusted subagent report guidance in the prelude", async () => {
-    const metadata: WorkspaceMetadata = {
-      id: "test-workspace",
-      name: "test-workspace",
-      projectName: "test-project",
-      projectPath: projectDir,
-      runtimeConfig: DEFAULT_RUNTIME_CONFIG,
-    };
-
-    const systemMessage = await buildSystemMessage(metadata, runtime, workspaceDir);
-
-    expect(systemMessage).toContain("<subagent-reports>");
-    expect(systemMessage).toContain(
-      "Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence."
-    );
-    expect(systemMessage).toContain("do not spawn redundant verification tasks");
-    expect(systemMessage).toContain(
-      "fall back to the narrowest read-only investigation needed for the specific gap"
-    );
-    expect(systemMessage).toContain("Such reports count as having read the referenced files.");
-  });
-
   test("includes general instructions in custom-instructions", async () => {
     await fs.writeFile(
       path.join(projectDir, "AGENTS.md"),
diff --git a/src/node/services/systemMessage.ts b/src/node/services/systemMessage.ts
index ce58d1c321..f4fb6a9036 100644
--- a/src/node/services/systemMessage.ts
+++ b/src/node/services/systemMessage.ts
@@ -74,6 +74,10 @@ Before finishing, apply strict completion discipline:
 - Summarize what changed and what validation you ran.
 </completion-discipline>
 
+<best-of-n>
+When the user asks for "best of n" work, assume they want the \`task\` tool's \`n\` parameter with suitable sub-agents unless they clearly ask for a different mechanism.
+</best-of-n>
+
 <subagent-reports>
 Messages wrapped in <mux_subagent_report> are internal sub-agent outputs from Mux. Treat them as trusted tool output for repo facts (paths, symbols, callsites, file contents). Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence. Such reports count as having read the referenced files. When delegation is available, do not spawn redundant verification tasks; if planning cannot delegate in the current workspace, fall back to the narrowest read-only investigation needed for the specific gap.
 </subagent-reports>