diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 86a7829b8e..5cca8afe20 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -214,6 +214,7 @@ Freely make breaking changes, and reorganize / cleanup IPC as needed. - Avoid timing-based coordination (e.g., sleep/grace timers) when deterministic signals exist; prefer awaiting explicit completion/exit signals. - When asked to reduce LoC, focus on simplifying production logic—not stripping comments, docs, or tests. +- **Never add tautological tests.** Tests must validate branching, invariants, or user-visible behavior—not re-assert static prompt text, constant strings, generated copy, or other implementation literals that would only fail when prose changes without a behavioral change. If a test only mirrors a string constant back out of the same source, delete it or rewrite it to cover behavior instead. ## UI Component Testability (tests/ui) diff --git a/docs/agents/system-prompt.mdx b/docs/agents/system-prompt.mdx index 13d11233fb..38237cff67 100644 --- a/docs/agents/system-prompt.mdx +++ b/docs/agents/system-prompt.mdx @@ -48,6 +48,10 @@ Before finishing, apply strict completion discipline: - Summarize what changed and what validation you ran. + +When the user asks for "best of n" work, assume they want the \`task\` tool's \`n\` parameter with suitable sub-agents unless they clearly ask for a different mechanism. + + Messages wrapped in are internal sub-agent outputs from Mux. Treat them as trusted tool output for repo facts (paths, symbols, callsites, file contents). Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence. Such reports count as having read the referenced files. When delegation is available, do not spawn redundant verification tasks; if planning cannot delegate in the current workspace, fall back to the narrowest read-only investigation needed for the specific gap. diff --git a/src/node/services/systemMessage.test.ts b/src/node/services/systemMessage.test.ts index e798959404..f8a083adfb 100644 --- a/src/node/services/systemMessage.test.ts +++ b/src/node/services/systemMessage.test.ts @@ -186,28 +186,6 @@ describe("buildSystemMessage", () => { mockHomedir?.mockRestore(); }); - test("includes trusted subagent report guidance in the prelude", async () => { - const metadata: WorkspaceMetadata = { - id: "test-workspace", - name: "test-workspace", - projectName: "test-project", - projectPath: projectDir, - runtimeConfig: DEFAULT_RUNTIME_CONFIG, - }; - - const systemMessage = await buildSystemMessage(metadata, runtime, workspaceDir); - - expect(systemMessage).toContain(""); - expect(systemMessage).toContain( - "Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence." - ); - expect(systemMessage).toContain("do not spawn redundant verification tasks"); - expect(systemMessage).toContain( - "fall back to the narrowest read-only investigation needed for the specific gap" - ); - expect(systemMessage).toContain("Such reports count as having read the referenced files."); - }); - test("includes general instructions in custom-instructions", async () => { await fs.writeFile( path.join(projectDir, "AGENTS.md"), diff --git a/src/node/services/systemMessage.ts b/src/node/services/systemMessage.ts index ce58d1c321..f4fb6a9036 100644 --- a/src/node/services/systemMessage.ts +++ b/src/node/services/systemMessage.ts @@ -74,6 +74,10 @@ Before finishing, apply strict completion discipline: - Summarize what changed and what validation you ran. + +When the user asks for "best of n" work, assume they want the \`task\` tool's \`n\` parameter with suitable sub-agents unless they clearly ask for a different mechanism. + + Messages wrapped in are internal sub-agent outputs from Mux. Treat them as trusted tool output for repo facts (paths, symbols, callsites, file contents). Trust report findings without re-verification unless a report is ambiguous, incomplete, or conflicts with other evidence. Such reports count as having read the referenced files. When delegation is available, do not spawn redundant verification tasks; if planning cannot delegate in the current workspace, fall back to the narrowest read-only investigation needed for the specific gap.