From ed05e865ef703e9b28face0e4cb16b75c01a88dd Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 29 Apr 2026 16:13:43 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20remove=20system=201?= =?UTF-8?q?=20feature?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the System 1 experiment, settings UI, send-option plumbing, internal bash compaction agent/tooling, and generated docs references.\n\n---\n\n_Generated with `mux` • Model: `openai:gpt-5.5` • Thinking: `high` • Cost: `$24.72`_\n\n --- docs/agents/index.mdx | 78 -- docs/hooks/tools.mdx | 12 - scripts/system1-noisy-output.ts | 539 ------------- .../contexts/ExperimentsContext.test.tsx | 6 +- .../Sections/ExperimentsSection.stories.tsx | 2 +- .../Sections/System1Section.stories.tsx | 82 -- .../Settings/Sections/System1Section.tsx | 491 ------------ .../Settings/Sections/TasksSection.agents.ts | 10 - .../Settings/Sections/TasksSection.tsx | 9 +- .../Settings/SettingsPage.stories.tsx | 38 +- .../features/Settings/SettingsPage.tsx | 25 +- src/browser/features/Tools/BashToolCall.tsx | 16 +- src/browser/hooks/useExperiments.test.ts | 16 +- src/browser/hooks/useSendMessageOptions.ts | 25 +- src/browser/stores/WorkspaceStore.ts | 19 +- .../utils/messages/buildSendMessageOptions.ts | 24 +- .../messages/liveBashOutputBuffer.test.ts | 16 - .../utils/messages/liveBashOutputBuffer.ts | 22 +- .../utils/messages/sendOptions.test.ts | 20 +- src/browser/utils/messages/sendOptions.ts | 18 +- .../utils/modelPreferenceRepair.test.ts | 5 +- src/browser/utils/modelPreferenceRepair.ts | 2 - src/cli/run.ts | 1 - src/common/config/schemas/taskSettings.ts | 32 - src/common/constants/experiments.ts | 9 - src/common/constants/storage.ts | 12 - src/common/orpc/schemas/stream.ts | 7 - src/common/types/message.ts | 4 - src/common/types/tasks.test.ts | 28 +- src/common/types/tasks.ts | 82 +- src/common/utils/agentTools.test.ts | 2 +- src/common/utils/tools/toolDefinitions.ts | 46 -- src/common/utils/tools/tools.ts | 2 - src/common/utils/truncateBashOutput.ts | 53 -- src/node/acp/streamTranslator.ts | 1 - src/node/builtinAgents/desktop.md | 2 - src/node/builtinAgents/exec.md | 2 - src/node/builtinAgents/system1_bash.md | 63 -- .../builtInAgentContent.generated.ts | 5 +- .../builtInAgentDefinitions.ts | 1 - .../agentSession.startupAutoRetry.test.ts | 6 - src/node/services/agentSession.ts | 30 - .../builtInSkillContent.generated.ts | 90 --- src/node/services/aiService.test.ts | 64 -- src/node/services/aiService.ts | 31 +- src/node/services/experimentsService.test.ts | 24 +- .../system1/bashCompactionPolicy.test.ts | 292 ------- .../services/system1/bashCompactionPolicy.ts | 421 ---------- .../system1/bashOutputFiltering.test.ts | 155 ---- .../services/system1/bashOutputFiltering.ts | 228 ------ .../system1/system1AgentRunner.test.ts | 284 ------- .../services/system1/system1AgentRunner.ts | 228 ------ src/node/services/system1ToolWrapper.ts | 741 ------------------ .../telemetryService.featureFlags.test.ts | 4 +- .../services/tools/system1_keep_ranges.ts | 43 - src/node/services/workspaceService.ts | 39 +- .../system1BashCompaction.matrix.test.ts | 221 ------ tests/ui/agents/thinkingPolicy.test.ts | 105 --- 58 files changed, 53 insertions(+), 4780 deletions(-) delete mode 100644 scripts/system1-noisy-output.ts delete mode 100644 src/browser/features/Settings/Sections/System1Section.stories.tsx delete mode 100644 src/browser/features/Settings/Sections/System1Section.tsx delete mode 100644 src/common/utils/truncateBashOutput.ts delete mode 100644 src/node/builtinAgents/system1_bash.md delete mode 100644 src/node/services/system1/bashCompactionPolicy.test.ts delete mode 100644 src/node/services/system1/bashCompactionPolicy.ts delete mode 100644 src/node/services/system1/bashOutputFiltering.test.ts delete mode 100644 src/node/services/system1/bashOutputFiltering.ts delete mode 100644 src/node/services/system1/system1AgentRunner.test.ts delete mode 100644 src/node/services/system1/system1AgentRunner.ts delete mode 100644 src/node/services/system1ToolWrapper.ts delete mode 100644 src/node/services/tools/system1_keep_ranges.ts delete mode 100644 tests/ipc/streaming/system1BashCompaction.matrix.test.ts delete mode 100644 tests/ui/agents/thinkingPolicy.test.ts diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index 188acbbe60..ce30baee01 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -277,8 +277,6 @@ tools: # Exec mode doesn't use planning tools - propose_plan - ask_user_question - # Internal-only tools - - system1_keep_ranges # Global config and catalog tools stay out of general-purpose agents - mux_agents_.* - agent_skill_write @@ -647,8 +645,6 @@ tools: # No planning tools - propose_plan - ask_user_question - # Internal-only - - system1_keep_ranges # Global config and catalog tools - mux_agents_.* - agent_skill_write @@ -746,80 +742,6 @@ Do not emit text responses. Call the `propose_name` tool immediately. -### System1 Bash (internal) - -**Fast bash-output filtering (internal)** - - - -```md ---- -name: System1 Bash -description: Fast bash-output filtering (internal) -ui: - hidden: true -subagent: - runnable: false -tools: - add: - - system1_keep_ranges ---- - -You are a fast bash-output filtering assistant. - -You will be given: - -- `maxKeptLines` (budget) -- `Display name` (optional): a short intent label for the command -- `Bash script` -- `Numbered output` - -Given the numbered output, decide which lines to keep so the user sees the most relevant information. - -IMPORTANT: - -- You MUST call `system1_keep_ranges` exactly once. -- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments). - -Rules: - -- Line numbers are 1-based indices into the numbered output. -- Use the `Display name` and `Bash script` as intent hints. -- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping - representative file paths/matches and any summary/counts (not just errors). -- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings. -- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra - denoising: prefer keeping most/all lines within the budget. -- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget. -- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest - next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths, - and conflict markers instead. -- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when - the hint is the only clue explaining a blocking state. -- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context. -- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just - to reduce range count; it's OK to return many ranges when denoising (e.g., > 8). -- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning - (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only - the most informative instance plus minimal surrounding context. -- If there are many similar warnings/errors, keep only a few representative examples (prefer those - with file paths/line numbers) plus any summary/count. -- Always keep at least 1 line if any output exists. -- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate). - -Example: - -- Numbered output: - - 0001| building... - - 0002| ERROR: expected X, got Y - - 0003| at path/to/file.ts:12:3 - - 0004| done -- Tool call: - - system1_keep_ranges({"keep_ranges":[{"start":2,"end":3,"reason":"error"}]}) -``` - - - {/* END BUILTIN_AGENTS */} ## Related Docs diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index b1fd66df3d..0f1e9d8054 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -594,18 +594,6 @@ If a value is too large for the environment, it may be omitted (not set). Mux al -
-system1_keep_ranges (4) - -| Env var | JSON path | Type | Description | -| ------------------------------------------- | ----------------------------- | ------ | ------------------------------------------------------- | -| `MUX_TOOL_INPUT_KEEP_RANGES__END` | `keep_ranges[].end` | number | 1-based end line (inclusive) in the numbered output | -| `MUX_TOOL_INPUT_KEEP_RANGES__REASON` | `keep_ranges[].reason` | string | Optional short reason for keeping this range | -| `MUX_TOOL_INPUT_KEEP_RANGES__START` | `keep_ranges[].start` | number | 1-based start line (inclusive) in the numbered output | -| `MUX_TOOL_INPUT_KEEP_RANGES_COUNT` | `keep_ranges.length` | number | Number of elements in keep_ranges (Line ranges to keep) | - -
-
task (8) diff --git a/scripts/system1-noisy-output.ts b/scripts/system1-noisy-output.ts deleted file mode 100644 index c7c7566943..0000000000 --- a/scripts/system1-noisy-output.ts +++ /dev/null @@ -1,539 +0,0 @@ -#!/usr/bin/env bun -/** - * Emit deliberately noisy stdout designed to trigger Mux "System 1" bash-output filtering. - * - * Goal: - * - Large enough to trigger System 1 filtering (lines > 10 and/or bytes > 4KB) - * - Small enough to avoid the bash tool tmpfile overflow path per burst (bytes < 16KB, lines < 300) - * - Output looks like plausible text/log spam rather than random gibberish - * - Contains a single "needle" phrase embedded inside the noise (not on a standalone ERROR line) - * - * Usage: - * bun scripts/system1-noisy-output.ts - * bun scripts/system1-noisy-output.ts --bursts 5 --sleep-ms 200 - * bun scripts/system1-noisy-output.ts --git - * bun scripts/system1-noisy-output.ts --needles - * - * Notes: - * - Use --bursts/--sleep-ms for background-bash testing (bash_output/task_await). - * In foreground mode, multiple bursts may exceed the bash tool output limits. - */ - -import assert from "node:assert/strict"; -import { randomInt } from "node:crypto"; - -// Keep these in sync with: -// - src/node/services/aiService.ts (System 1 trigger) -// - src/common/constants/toolLimits.ts (bash tool output limits) -const SYSTEM1_BASH_MIN_LINES = 10; -const SYSTEM1_BASH_MIN_TOTAL_BYTES = 4 * 1024; - -const BASH_HARD_MAX_LINES = 300; -const BASH_MAX_TOTAL_BYTES = 16 * 1024; -const BASH_MAX_LINE_BYTES = 1024; - -// Safety margin to avoid accidental boundary changes (encoding/newlines, etc.). -const TARGET_MAX_TOTAL_BYTES_SINGLE = BASH_MAX_TOTAL_BYTES - 512; - -// For burst mode, keep each burst modest so the final task output is still manageable. -const TARGET_MAX_TOTAL_BYTES_BURST = Math.min( - TARGET_MAX_TOTAL_BYTES_SINGLE, - SYSTEM1_BASH_MIN_TOTAL_BYTES + 2048 -); - -const TARGET_MAX_TOTAL_BYTES_NEEDLES = Math.min( - TARGET_MAX_TOTAL_BYTES_SINGLE, - SYSTEM1_BASH_MIN_TOTAL_BYTES + 8 * 1024 -); - -// Keep lines short and ASCII-only so byte counts are predictable. -const TEXT_LINE_TARGET_LEN = 96; - -// Intentionally a plausible-looking phrase (no digits) so it can be buried in noise. -// We keep it constant so it is easy to grep for during manual testing. -const NEEDLE_PHRASE = "maecenas faucibus mollis interdum"; - -// A realistic-ish git rebase conflict transcript, condensed to remove hint blocks and repeated -// "Rebasing (n/N)" progress spam. -// -// This fixture is intentionally > 10 lines so it reliably triggers System 1 filtering. -const GIT_REBASE_CONFLICT_OUTPUT_LINES = [ - "Rebasing (6/14)", - "Applying: chore: format", - "Applying: refactor(router): split handlers", - "Applying: refactor(router): clean up error reporting", - "Applying: fix: include plan path in harness bearings", - "Applying: tests: update router snapshots", - "Applying: build: regenerate orpc types", - "Auto-merging src/node/orpc/router.ts", - "CONFLICT (content): Merge conflict in src/node/orpc/router.ts", - "error: could not apply 678c593ed... fix: include plan path in harness bearings", - "Could not apply 678c593ed... fix: include plan path in harness bearings", -] as const; - -const DEVELOPER_SIGNAL_LINES = [ - // High-priority (should always surface): - "Error: ECONNREFUSED 127.0.0.1:5432 - database connection failed", - "FATAL: Out of memory, killing process", - "warning: deprecated API 'fetchSync' will be removed in v3.0", - "Tests: 47 passed, 2 failed, 1 skipped", - "Build failed with exit code 1", - // Medium-priority (likely important): - "Listening on http://localhost:3000", - "Compiled successfully in 1.2s", - "warning: unused variable 'config' on line 42", - "Migration 20240115_add_users applied successfully", - // Subtle/tricky (tests semantic understanding): - "Retrying request (attempt 3/3)...", - "Skipping optional dependency: fsevents", - "Connection pool exhausted, waiting...", - // False positive traps (should NOT surface): - "Processing error_handler.ts...", - "const WARNING_LEVEL = 3;", - "// TODO: handle edge case", - // Security-sensitive (definitely surface): - "API_KEY=sk-live-abc123...", - "Permission denied: /etc/shadow", -] as const; - -const LOREM_WORDS = [ - // Classic lorem ipsum core - "lorem", - "ipsum", - "dolor", - "sit", - "amet", - "consectetur", - "adipiscing", - "elit", - "sed", - "do", - "eiusmod", - "tempor", - "incididunt", - "ut", - "labore", - "et", - "dolore", - "magna", - "aliqua", - "enim", - "ad", - "minim", - "veniam", - "quis", - "nostrud", - "exercitation", - "ullamco", - "laboris", - "nisi", - "aliquip", - "ex", - "ea", - "commodo", - "consequat", - "duis", - "aute", - "irure", - "in", - "reprehenderit", - "voluptate", - "velit", - "esse", - "cillum", - "fugiat", - "nulla", - "pariatur", - "excepteur", - "sint", - "occaecat", - "cupidatat", - "non", - "proident", - "sunt", - "culpa", - "qui", - "officia", - "deserunt", - "mollit", - "anim", - "id", - "est", - "laborum", - // Additional filler to make output look less uniform - "pellentesque", - "habitant", - "morbi", - "tristique", - "senectus", - "netus", - "malesuada", - "fames", - "turpis", - "egestas", - "vestibulum", - "tortor", - "quam", - "feugiat", - "vitae", - "ultricies", - "eget", - "ante", - "donec", - "eu", - "libero", - "quam", - "semper", - "aenean", - "mauris", - "placerat", - "eleifend", - "leo", -]; - -function lineBytesWithNewline(line: string): number { - const bytes = Buffer.byteLength(line, "utf8"); - assert( - bytes < BASH_MAX_LINE_BYTES, - `Generated line exceeded ${BASH_MAX_LINE_BYTES} bytes (${bytes})` - ); - return bytes + 1; // + "\n" -} - -function parseArgs(argv: string[]): { - bursts: number; - sleepMs: number; - isGit: boolean; - isNeedles: boolean; -} { - let bursts = 1; - let sleepMs = 0; - let isGit = false; - let isNeedles = false; - - for (let i = 0; i < argv.length; i++) { - const arg = argv[i]; - - if (arg === "--git") { - isGit = true; - continue; - } - - if (arg === "--needles") { - isNeedles = true; - continue; - } - - if (arg === "--bursts") { - const raw = argv[i + 1]; - assert(raw !== undefined, "--bursts requires a value"); - bursts = Number.parseInt(raw, 10); - i += 1; - continue; - } - - if (arg === "--sleep-ms") { - const raw = argv[i + 1]; - assert(raw !== undefined, "--sleep-ms requires a value"); - sleepMs = Number.parseInt(raw, 10); - i += 1; - continue; - } - - if (arg === "--help" || arg === "-h") { - // Avoid printing in normal use; throwing is fine for dev tooling. - throw new Error( - "Usage: bun scripts/system1-noisy-output.ts [--git] [--needles] [--bursts N] [--sleep-ms MS]" - ); - } - - throw new Error(`Unknown arg: ${arg}`); - } - - assert(Number.isInteger(bursts) && bursts >= 1, "--bursts must be an integer >= 1"); - assert(Number.isInteger(sleepMs) && sleepMs >= 0, "--sleep-ms must be an integer >= 0"); - assert(!(isGit && isNeedles), "--git and --needles are mutually exclusive"); - - if (isGit || isNeedles) { - assert(bursts === 1, "--git/--needles mode does not support --bursts"); - assert(sleepMs === 0, "--git/--needles mode does not support --sleep-ms"); - } - - return { bursts, sleepMs, isGit, isNeedles }; -} - -function getGitRebaseConflictOutput(): string { - const lines = [...GIT_REBASE_CONFLICT_OUTPUT_LINES]; - - // Ensure we *definitely* cross System 1 activation thresholds. - assert(lines.length > SYSTEM1_BASH_MIN_LINES, "Git output did not exceed System 1 min lines"); - - const output = lines.join("\n") + "\n"; - const outputBytes = Buffer.byteLength(output, "utf8"); - - // Defensive checks (should never fire unless assumptions change). - assert(lines.length < BASH_HARD_MAX_LINES, "Git output exceeded bash max lines"); - assert(outputBytes < BASH_MAX_TOTAL_BYTES, "Git output exceeded bash max bytes"); - - // Ensure we never emit a single line that breaks the tool output limit. - for (const line of lines) { - void lineBytesWithNewline(line); - } - - // Sanity: keep the core conflict lines intact. - assert(output.includes("CONFLICT (content):"), "Git output missing CONFLICT line"); - assert(output.includes("could not apply"), "Git output missing could-not-apply line"); - - return output; -} - -function makePlausibleDevLogLine(index: number): string { - assert(Number.isInteger(index) && index >= 0, "index must be a non-negative integer"); - - const fileId = String(index % 97).padStart(2, "0"); - - switch (index % 6) { - case 0: - return `info: [builder] transpiling src/module-${fileId}.ts -> dist/module-${fileId}.js`; - case 1: - return `info: [builder] resolved ${100 + index} modules, ${20 + (index % 15)} cached`; - case 2: - return `debug: [cache] hit for pkg-${index % 17}@${1 + (index % 3)}.${index % 10}.${index % 7}`; - case 3: - return `info: [bundler] emitted chunk-${index % 12} (${50 + (index % 40)}kb)`; - case 4: - return `info: [lint] checking src/file-${fileId}.ts (mode=${index % 4 === 0 ? "fast" : "full"})`; - case 5: - return `debug: [timing] step=${index} duration=${(index % 9) + 1}.${index % 10}s`; - default: - // This should be unreachable due to modulo. - assert(false, "Unreachable dev log line case"); - } -} - -function getNeedlesOutput(params: { targetMaxTotalBytes: number }): string { - assert( - Number.isInteger(params.targetMaxTotalBytes) && params.targetMaxTotalBytes > 0, - "targetMaxTotalBytes must be a positive integer" - ); - - const lines: string[] = []; - let totalBytes = 0; - - const addLine = (line: string): void => { - const lineBytes = lineBytesWithNewline(line); - - // Must stay under bash tmpfile overflow limits or System 1 will never run. - assert(lines.length + 1 < BASH_HARD_MAX_LINES, "Exceeded bash max lines"); - assert(totalBytes + lineBytes < BASH_MAX_TOTAL_BYTES, "Exceeded bash max bytes"); - - lines.push(line); - totalBytes += lineBytes; - }; - - // Preamble: looks like a plausible command run. - addLine("$ bun run build"); - - let noiseIndex = 0; - const noiseBetweenSignals = 6; - - for (const signalLine of DEVELOPER_SIGNAL_LINES) { - for (let i = 0; i < noiseBetweenSignals; i++) { - addLine(makePlausibleDevLogLine(noiseIndex)); - noiseIndex += 1; - } - - addLine(signalLine); - } - - // Fill with more plausible noise so System 1 has to choose (max kept lines is limited). - for (;;) { - const candidate = makePlausibleDevLogLine(noiseIndex); - noiseIndex += 1; - - const candidateBytes = lineBytesWithNewline(candidate); - - if (lines.length + 1 >= BASH_HARD_MAX_LINES) break; - if (totalBytes + candidateBytes >= params.targetMaxTotalBytes) break; - - addLine(candidate); - } - - const output = lines.join("\n") + "\n"; - const outputBytes = Buffer.byteLength(output, "utf8"); - - // Final defensive checks (should never fire unless assumptions change). - assert(lines.length > SYSTEM1_BASH_MIN_LINES, "Needles output did not exceed System 1 min lines"); - assert( - outputBytes > SYSTEM1_BASH_MIN_TOTAL_BYTES, - "Needles output did not exceed System 1 min bytes" - ); - assert(outputBytes < BASH_MAX_TOTAL_BYTES, "Needles output exceeded bash max bytes"); - assert(lines.length < BASH_HARD_MAX_LINES, "Needles output exceeded bash max lines"); - - // Sanity: keep our intended signal lines intact. - for (const signalLine of DEVELOPER_SIGNAL_LINES) { - assert(output.includes(signalLine), `Needles output missing expected line: ${signalLine}`); - } - - return output; -} - -function capitalizeFirstLetter(text: string): string { - assert(typeof text === "string" && text.length > 0, "text must be a non-empty string"); - return text[0].toUpperCase() + text.slice(1); -} - -function randomLoremWord(): string { - return LOREM_WORDS[randomInt(0, LOREM_WORDS.length)]!; -} - -function makeLoremSentence(params: { - targetLen: number; - insertPhrase?: string | undefined; -}): string { - assert(Number.isInteger(params.targetLen) && params.targetLen > 0, "targetLen must be > 0"); - - const words: string[] = []; - let currentLen = 0; - - // Build up a sentence with word-ish lengths; keep it deterministic-ish by targeting characters. - while (currentLen < params.targetLen) { - const next = randomLoremWord(); - words.push(next); - currentLen += next.length + 1; - - // Avoid pathological loops if assumptions change. - assert(words.length < 200, "Generated too many words for a single line"); - } - - // Add a comma in roughly the middle to make it look slightly more natural. - if (words.length > 12) { - const commaAt = randomInt(4, Math.min(9, words.length - 2)); - words[commaAt] = `${words[commaAt]},`; - } - - if (typeof params.insertPhrase === "string" && params.insertPhrase.length > 0) { - const phraseWords = params.insertPhrase.split(/\s+/).filter(Boolean); - assert(phraseWords.length > 0, "insertPhrase produced no words"); - - // Prefer inserting somewhere "in the middle" so the phrase isn't the start or end of the line. - const minIndex = Math.min(6, Math.max(0, words.length - 1)); - const maxIndex = Math.max(minIndex + 1, words.length - 3); - const insertAt = randomInt(minIndex, maxIndex); - - words.splice(insertAt, 0, ...phraseWords); - } - - const sentence = capitalizeFirstLetter(words.join(" ")) + "."; - // Must not accidentally exceed the bash tool line limit. - void lineBytesWithNewline(sentence); - return sentence; -} - -function generateBurst(params: { - includeNeedle: boolean; - needlePhrase: string; - targetMaxTotalBytes: number; -}): string { - const lines: string[] = []; - let totalBytes = 0; - - let needleInserted = false; - const needleInsertAfterLines = randomInt(8, 18); - - const addLine = (line: string): void => { - const lineBytes = lineBytesWithNewline(line); - - // Must stay under bash tmpfile overflow limits or System 1 will never run. - assert(lines.length + 1 < BASH_HARD_MAX_LINES, "Exceeded bash max lines"); - assert(totalBytes + lineBytes < BASH_MAX_TOTAL_BYTES, "Exceeded bash max bytes"); - - lines.push(line); - totalBytes += lineBytes; - }; - - // Ensure we *definitely* cross System 1 activation thresholds. - while (lines.length <= SYSTEM1_BASH_MIN_LINES || totalBytes <= SYSTEM1_BASH_MIN_TOTAL_BYTES) { - if (params.includeNeedle && !needleInserted && lines.length >= needleInsertAfterLines) { - addLine( - makeLoremSentence({ - targetLen: TEXT_LINE_TARGET_LEN, - insertPhrase: params.needlePhrase, - }) - ); - needleInserted = true; - continue; - } - - addLine(makeLoremSentence({ targetLen: TEXT_LINE_TARGET_LEN })); - } - - // Defensive: ensure the needle exists even if our insertion assumptions change. - if (params.includeNeedle && !needleInserted) { - addLine( - makeLoremSentence({ - targetLen: TEXT_LINE_TARGET_LEN, - insertPhrase: params.needlePhrase, - }) - ); - } - - // Add as much additional noise as possible while staying safely under the target. - for (;;) { - const candidate = makeLoremSentence({ targetLen: TEXT_LINE_TARGET_LEN }); - const candidateBytes = lineBytesWithNewline(candidate); - - if (lines.length + 1 >= BASH_HARD_MAX_LINES) break; - if (totalBytes + candidateBytes >= params.targetMaxTotalBytes) break; - - addLine(candidate); - } - - const output = lines.join("\n") + "\n"; - const outputBytes = Buffer.byteLength(output, "utf8"); - - // Final defensive checks (should never fire unless assumptions change). - assert(lines.length > SYSTEM1_BASH_MIN_LINES, "Output did not exceed System 1 min lines"); - assert(outputBytes > SYSTEM1_BASH_MIN_TOTAL_BYTES, "Output did not exceed System 1 min bytes"); - assert(outputBytes < BASH_MAX_TOTAL_BYTES, "Output exceeded bash max bytes"); - assert(lines.length < BASH_HARD_MAX_LINES, "Output exceeded bash max lines"); - - return output; -} - -async function main(): Promise { - const { bursts, sleepMs, isGit, isNeedles } = parseArgs(process.argv.slice(2)); - - if (isGit) { - process.stdout.write(getGitRebaseConflictOutput()); - return; - } - - if (isNeedles) { - process.stdout.write( - getNeedlesOutput({ - targetMaxTotalBytes: TARGET_MAX_TOTAL_BYTES_NEEDLES, - }) - ); - return; - } - - const perBurstTarget = bursts > 1 ? TARGET_MAX_TOTAL_BYTES_BURST : TARGET_MAX_TOTAL_BYTES_SINGLE; - - for (let i = 0; i < bursts; i++) { - const output = generateBurst({ - includeNeedle: i === 0, - needlePhrase: NEEDLE_PHRASE, - targetMaxTotalBytes: perBurstTarget, - }); - - process.stdout.write(output); - - if (sleepMs > 0 && i < bursts - 1) { - await new Promise((resolve) => setTimeout(resolve, sleepMs)); - } - } -} - -await main(); diff --git a/src/browser/contexts/ExperimentsContext.test.tsx b/src/browser/contexts/ExperimentsContext.test.tsx index a8938eeb36..7fe517d69a 100644 --- a/src/browser/contexts/ExperimentsContext.test.tsx +++ b/src/browser/contexts/ExperimentsContext.test.tsx @@ -136,12 +136,12 @@ describe("ExperimentsProvider", () => { if (callCount === 1) { return Promise.resolve({ - [EXPERIMENT_IDS.SYSTEM_1]: { value: null, source: "cache" }, + [EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING]: { value: null, source: "cache" }, } satisfies Record); } return Promise.resolve({ - [EXPERIMENT_IDS.SYSTEM_1]: { value: "test", source: "posthog" }, + [EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING]: { value: "test", source: "posthog" }, } satisfies Record); }); @@ -180,7 +180,7 @@ describe("ExperimentsProvider", () => { }) as typeof globalThis.clearTimeout; function Observer() { - const enabled = useExperimentValue(EXPERIMENT_IDS.SYSTEM_1); + const enabled = useExperimentValue(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING); return
{String(enabled)}
; } diff --git a/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx b/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx index 22975a3de4..33463e198b 100644 --- a/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx +++ b/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx @@ -26,7 +26,7 @@ export const ExperimentsToggleOn: Story = { setupSettingsStory({ - experiments: { [EXPERIMENT_IDS.SYSTEM_1]: true }, + experiments: { [EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING]: true }, }) } > diff --git a/src/browser/features/Settings/Sections/System1Section.stories.tsx b/src/browser/features/Settings/Sections/System1Section.stories.tsx deleted file mode 100644 index a0686d239b..0000000000 --- a/src/browser/features/Settings/Sections/System1Section.stories.tsx +++ /dev/null @@ -1,82 +0,0 @@ -import { lightweightMeta } from "@/browser/stories/meta.js"; -import { EXPERIMENT_IDS } from "@/common/constants/experiments"; -import type { Meta, StoryObj } from "@storybook/react-vite"; -import { waitFor, within } from "@storybook/test"; -import { System1Section } from "./System1Section.js"; -import { SettingsSectionStory, setupSettingsStory } from "./settingsStoryUtils.js"; - -const meta: Meta = { - ...lightweightMeta, - title: "Settings/Sections/System1Section", - component: System1Section, -}; - -export default meta; -type Story = StoryObj; - -export const System1: Story = { - render: () => ( - - setupSettingsStory({ - experiments: { [EXPERIMENT_IDS.SYSTEM_1]: true }, - taskSettings: { - bashOutputCompactionMinLines: 12, - bashOutputCompactionMinTotalBytes: 8192, - bashOutputCompactionMaxKeptLines: 55, - bashOutputCompactionTimeoutMs: 9000, - }, - providersConfig: { - anthropic: { - apiKeySet: true, - isEnabled: true, - isConfigured: true, - baseUrl: "", - models: ["claude-sonnet-4-20250514", "claude-opus-4-20250514"], - }, - openai: { - apiKeySet: true, - isEnabled: true, - isConfigured: true, - baseUrl: "", - models: ["gpt-4o", "gpt-4o-mini", "o1-preview"], - }, - }, - }) - } - > - - - ), - play: async ({ canvasElement }) => { - const canvas = within(canvasElement); - - await canvas.findByText(/System 1 Model/i); - await canvas.findByText(/System 1 Reasoning/i); - await canvas.findByRole("heading", { name: /bash output compaction/i }); - - await waitFor(() => { - const inputs = canvas.queryAllByRole("spinbutton"); - if (inputs.length !== 4) { - throw new Error(`Expected 4 System 1 inputs, got ${inputs.length}`); - } - const minLines = (inputs[0] as HTMLInputElement).value; - const minTotalKb = (inputs[1] as HTMLInputElement).value; - const maxKeptLines = (inputs[2] as HTMLInputElement).value; - const timeoutSeconds = (inputs[3] as HTMLInputElement).value; - - if (minLines !== "12") { - throw new Error(`Expected minLines=12, got ${JSON.stringify(minLines)}`); - } - if (minTotalKb !== "8") { - throw new Error(`Expected minTotalKb=8, got ${JSON.stringify(minTotalKb)}`); - } - if (maxKeptLines !== "55") { - throw new Error(`Expected maxKeptLines=55, got ${JSON.stringify(maxKeptLines)}`); - } - if (timeoutSeconds !== "9") { - throw new Error(`Expected timeoutSeconds=9, got ${JSON.stringify(timeoutSeconds)}`); - } - }); - }, -}; diff --git a/src/browser/features/Settings/Sections/System1Section.tsx b/src/browser/features/Settings/Sections/System1Section.tsx deleted file mode 100644 index c6ca010d11..0000000000 --- a/src/browser/features/Settings/Sections/System1Section.tsx +++ /dev/null @@ -1,491 +0,0 @@ -import React, { useEffect, useRef, useState } from "react"; -import { Loader2 } from "lucide-react"; - -import { Switch } from "@/browser/components/Switch/Switch"; -import { Input } from "@/browser/components/Input/Input"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/browser/components/SelectPrimitive/SelectPrimitive"; -import { useAPI } from "@/browser/contexts/API"; -import { useOptionalWorkspaceContext } from "@/browser/contexts/WorkspaceContext"; -import { getDefaultModel, getSuggestedModels } from "@/browser/hooks/useModelsFromSettings"; -import { useProvidersConfig } from "@/browser/hooks/useProvidersConfig"; -import { usePersistedState } from "@/browser/hooks/usePersistedState"; -import { - getModelKey, - PREFERRED_SYSTEM_1_MODEL_KEY, - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, -} from "@/common/constants/storage"; -import { - DEFAULT_TASK_SETTINGS, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS, - normalizeTaskSettings, - type TaskSettings, -} from "@/common/types/tasks"; -import { enforceThinkingPolicy, getThinkingPolicyForModel } from "@/common/utils/thinking/policy"; -import { - THINKING_LEVELS, - coerceThinkingLevel, - getThinkingOptionLabel, -} from "@/common/types/thinking"; - -import { SearchableModelSelect } from "../Components/SearchableModelSelect"; -import { getErrorMessage } from "@/common/utils/errors"; - -export function System1Section() { - const { api } = useAPI(); - const { config: providersConfig, loading: providersLoading } = useProvidersConfig(); - - const [taskSettings, setTaskSettings] = useState(DEFAULT_TASK_SETTINGS); - const [loaded, setLoaded] = useState(false); - const [loadFailed, setLoadFailed] = useState(false); - const [saveError, setSaveError] = useState(null); - - const saveTimerRef = useRef | null>(null); - const savingRef = useRef(false); - const lastSyncedRef = useRef(null); - const pendingSaveRef = useRef(null); - - const [system1ModelRaw, setSystem1ModelRaw] = usePersistedState( - PREFERRED_SYSTEM_1_MODEL_KEY, - "", - { - listener: true, - } - ); - - const system1Model = typeof system1ModelRaw === "string" ? system1ModelRaw : ""; - - const setSystem1Model = (value: string) => { - setSystem1ModelRaw(value); - }; - - const [system1ThinkingLevelRaw, setSystem1ThinkingLevelRaw] = usePersistedState( - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, - "off", - { listener: true } - ); - - const system1ThinkingLevel = coerceThinkingLevel(system1ThinkingLevelRaw) ?? "off"; - - const workspaceContext = useOptionalWorkspaceContext(); - const selectedWorkspaceId = workspaceContext?.selectedWorkspace?.workspaceId ?? null; - const defaultModel = getDefaultModel(); - - const workspaceModelStorageKey = selectedWorkspaceId - ? getModelKey(selectedWorkspaceId) - : "__system1_workspace_model_fallback__"; - - const [workspaceModelRaw] = usePersistedState(workspaceModelStorageKey, defaultModel, { - listener: true, - }); - - const system1ModelTrimmed = system1Model.trim(); - const workspaceModelTrimmed = - typeof workspaceModelRaw === "string" ? workspaceModelRaw.trim() : ""; - - const effectiveSystem1ModelStringForThinking = - system1ModelTrimmed || workspaceModelTrimmed || defaultModel; - - const policyThinkingLevels = getThinkingPolicyForModel(effectiveSystem1ModelStringForThinking); - const allowedThinkingLevels = - policyThinkingLevels.length > 0 ? policyThinkingLevels : THINKING_LEVELS; - - const effectiveSystem1ThinkingLevel = enforceThinkingPolicy( - effectiveSystem1ModelStringForThinking, - system1ThinkingLevel - ); - const setSystem1ThinkingLevel = (value: string) => { - setSystem1ThinkingLevelRaw(coerceThinkingLevel(value) ?? "off"); - }; - - useEffect(() => { - if (!api) { - return; - } - - setLoaded(false); - setLoadFailed(false); - setSaveError(null); - - void api.config - .getConfig() - .then((cfg) => { - const normalized = normalizeTaskSettings(cfg.taskSettings); - setTaskSettings(normalized); - lastSyncedRef.current = normalized; - setLoadFailed(false); - setLoaded(true); - }) - .catch((error: unknown) => { - setSaveError(getErrorMessage(error)); - setLoadFailed(true); - setLoaded(true); - }); - }, [api]); - - useEffect(() => { - if (!api) { - return; - } - if (!loaded) { - return; - } - if (loadFailed) { - return; - } - - // Debounce settings writes so typing doesn't thrash the disk. - const lastSynced = lastSyncedRef.current; - if (lastSynced && areTaskSettingsEqual(lastSynced, taskSettings)) { - pendingSaveRef.current = null; - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - return; - } - - pendingSaveRef.current = taskSettings; - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - - saveTimerRef.current = setTimeout(() => { - const flush = () => { - if (savingRef.current) { - return; - } - - const payload = pendingSaveRef.current; - if (!payload) { - return; - } - - pendingSaveRef.current = null; - savingRef.current = true; - - void api.config - .saveConfig({ - taskSettings: payload, - }) - .then(() => { - lastSyncedRef.current = payload; - setSaveError(null); - }) - .catch((error: unknown) => { - setSaveError(getErrorMessage(error)); - }) - .finally(() => { - savingRef.current = false; - flush(); - }); - }; - - flush(); - }, 400); - - return () => { - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - }; - }, [api, loaded, loadFailed, taskSettings]); - - // Flush any pending debounced save on unmount so changes aren't lost. - useEffect(() => { - if (!api) return; - if (!loaded) return; - if (loadFailed) return; - - return () => { - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - - if (savingRef.current) return; - const payload = pendingSaveRef.current; - if (!payload) return; - - pendingSaveRef.current = null; - savingRef.current = true; - void api.config - .saveConfig({ - taskSettings: payload, - }) - .catch(() => undefined) - .finally(() => { - savingRef.current = false; - }); - }; - }, [api, loaded, loadFailed]); - - const setBashOutputCompactionMinLines = (rawValue: string) => { - const parsed = Number(rawValue); - setTaskSettings((prev) => - normalizeTaskSettings({ - ...prev, - bashOutputCompactionMinLines: parsed, - }) - ); - }; - - const setBashOutputCompactionMinTotalKb = (rawValue: string) => { - const parsedKb = Math.floor(Number(rawValue)); - const bytes = parsedKb * 1024; - setTaskSettings((prev) => - normalizeTaskSettings({ - ...prev, - bashOutputCompactionMinTotalBytes: bytes, - }) - ); - }; - - const setBashOutputCompactionMaxKeptLines = (rawValue: string) => { - const parsed = Number(rawValue); - setTaskSettings((prev) => - normalizeTaskSettings({ - ...prev, - bashOutputCompactionMaxKeptLines: parsed, - }) - ); - }; - - const setBashOutputCompactionHeuristicFallback = (value: boolean) => { - setTaskSettings((prev) => - normalizeTaskSettings({ - ...prev, - bashOutputCompactionHeuristicFallback: value, - }) - ); - }; - - const setBashOutputCompactionTimeoutSeconds = (rawValue: string) => { - const parsedSeconds = Math.floor(Number(rawValue)); - const ms = parsedSeconds * 1000; - setTaskSettings((prev) => - normalizeTaskSettings({ - ...prev, - bashOutputCompactionTimeoutMs: ms, - }) - ); - }; - - if (!loaded || providersLoading || !providersConfig) { - return ( -
- - Loading settings... -
- ); - } - - const allModels = getSuggestedModels(providersConfig); - - const bashOutputCompactionMinLines = - taskSettings.bashOutputCompactionMinLines ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default; - const bashOutputCompactionMinTotalBytes = - taskSettings.bashOutputCompactionMinTotalBytes ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.default; - const bashOutputCompactionMaxKeptLines = - taskSettings.bashOutputCompactionMaxKeptLines ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.default; - const bashOutputCompactionHeuristicFallback = - taskSettings.bashOutputCompactionHeuristicFallback ?? - DEFAULT_TASK_SETTINGS.bashOutputCompactionHeuristicFallback ?? - true; - - const bashOutputCompactionTimeoutMs = - taskSettings.bashOutputCompactionTimeoutMs ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.default; - - const bashOutputCompactionMinTotalKb = Math.floor(bashOutputCompactionMinTotalBytes / 1024); - const bashOutputCompactionTimeoutSeconds = Math.floor(bashOutputCompactionTimeoutMs / 1000); - - return ( -
- {/* Model Defaults */} -
-
- System 1 Defaults -
-
-
-
-
System 1 Model
-
Context optimization
-
-
- -
-
- -
-
-
System 1 Reasoning
-
Log filtering
-
-
- -
-
-
-
- - {/* Bash output compaction */} -
-

Bash Output Compaction

-
-
-
-
Heuristic Fallback
-
- If System 1 returns invalid keep_ranges, fall back to deterministic filtering - instead of showing full output. -
-
- -
- -
-
-
Min Lines
-
- Filter when output has more than this many lines. Range{" "} - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.min}– - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.max}. -
-
- ) => - setBashOutputCompactionMinLines(e.target.value) - } - className="border-border-medium bg-background-secondary h-9 w-28" - /> -
- -
-
-
Min Total (KB)
-
- Filter when output exceeds this many kilobytes. Range{" "} - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.min / 1024} - – - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.max / 1024} - . -
-
- ) => - setBashOutputCompactionMinTotalKb(e.target.value) - } - className="border-border-medium bg-background-secondary h-9 w-28" - /> -
- -
-
-
Max Kept Lines
-
- Keep at most this many lines. Range{" "} - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.min}– - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.max}. -
-
- ) => - setBashOutputCompactionMaxKeptLines(e.target.value) - } - className="border-border-medium bg-background-secondary h-9 w-28" - /> -
- -
-
-
Timeout (seconds)
-
- Abort filtering if it takes longer than this many seconds. Range{" "} - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.min / 1000}– - {SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.max / 1000}. -
-
- ) => - setBashOutputCompactionTimeoutSeconds(e.target.value) - } - className="border-border-medium bg-background-secondary h-9 w-28" - /> -
-
- - {saveError ?
{saveError}
: null} -
-
- ); -} - -function areTaskSettingsEqual(a: TaskSettings, b: TaskSettings): boolean { - return ( - a.maxParallelAgentTasks === b.maxParallelAgentTasks && - a.maxTaskNestingDepth === b.maxTaskNestingDepth && - a.bashOutputCompactionMinLines === b.bashOutputCompactionMinLines && - a.bashOutputCompactionMinTotalBytes === b.bashOutputCompactionMinTotalBytes && - a.bashOutputCompactionMaxKeptLines === b.bashOutputCompactionMaxKeptLines && - a.bashOutputCompactionTimeoutMs === b.bashOutputCompactionTimeoutMs && - a.bashOutputCompactionHeuristicFallback === b.bashOutputCompactionHeuristicFallback - ); -} diff --git a/src/browser/features/Settings/Sections/TasksSection.agents.ts b/src/browser/features/Settings/Sections/TasksSection.agents.ts index c3f950458e..23f08e372f 100644 --- a/src/browser/features/Settings/Sections/TasksSection.agents.ts +++ b/src/browser/features/Settings/Sections/TasksSection.agents.ts @@ -64,7 +64,6 @@ export const FALLBACK_AGENTS: AgentDefinitionDescriptor[] = [ "task_apply_git_patch", "propose_plan", "ask_user_question", - "system1_keep_ranges", "mux_agents_.*", "agent_skill_write", ], @@ -102,15 +101,6 @@ export const FALLBACK_AGENTS: AgentDefinitionDescriptor[] = [ subagentRunnable: false, base: "exec", }, - { - id: "system1_bash", - scope: "built-in", - name: "System1 Bash", - description: "Fast bash-output filtering (internal)", - uiSelectable: false, - uiRoutable: false, - subagentRunnable: false, - }, ]; function compareAgentsByName(a: AgentDefinitionDescriptor, b: AgentDefinitionDescriptor): number { diff --git a/src/browser/features/Settings/Sections/TasksSection.tsx b/src/browser/features/Settings/Sections/TasksSection.tsx index 8fd91a2a82..ad199573f8 100644 --- a/src/browser/features/Settings/Sections/TasksSection.tsx +++ b/src/browser/features/Settings/Sections/TasksSection.tsx @@ -181,12 +181,7 @@ function areTaskSettingsEqual(a: TaskSettings, b: TaskSettings): boolean { a.proposePlanImplementReplacesChatHistory === b.proposePlanImplementReplacesChatHistory && a.preserveSubagentsUntilArchive === b.preserveSubagentsUntilArchive && a.planSubagentExecutorRouting === b.planSubagentExecutorRouting && - a.planSubagentDefaultsToOrchestrator === b.planSubagentDefaultsToOrchestrator && - a.bashOutputCompactionMinLines === b.bashOutputCompactionMinLines && - a.bashOutputCompactionMinTotalBytes === b.bashOutputCompactionMinTotalBytes && - a.bashOutputCompactionMaxKeptLines === b.bashOutputCompactionMaxKeptLines && - a.bashOutputCompactionTimeoutMs === b.bashOutputCompactionTimeoutMs && - a.bashOutputCompactionHeuristicFallback === b.bashOutputCompactionHeuristicFallback + a.planSubagentDefaultsToOrchestrator === b.planSubagentDefaultsToOrchestrator ); } @@ -270,7 +265,7 @@ export function TasksSection() { // Resolve the workspace's active model so that when a sub-agent's model is // "Inherit", we show thinking levels for the workspace model (falling back to - // the global default). This mirrors the resolution chain in System1Section. + // the global default). This mirrors the workspace model resolution chain used when sending messages. const selectedWorkspaceId = selectedWorkspace?.workspaceId ?? null; const defaultModel = getDefaultModel(); const workspaceModelStorageKey = selectedWorkspaceId diff --git a/src/browser/features/Settings/SettingsPage.stories.tsx b/src/browser/features/Settings/SettingsPage.stories.tsx index 0f2dc6ba6d..fa8d95f340 100644 --- a/src/browser/features/Settings/SettingsPage.stories.tsx +++ b/src/browser/features/Settings/SettingsPage.stories.tsx @@ -1,5 +1,4 @@ import { appMeta, AppWithMocks, type AppStory } from "@/browser/stories/meta.js"; -import { EXPERIMENT_IDS } from "@/common/constants/experiments"; import { waitFor, within, userEvent } from "@storybook/test"; import { setupSettingsStory } from "./Sections/settingsStoryUtils.js"; @@ -24,7 +23,7 @@ const BASE_SECTION_LABELS = [ ] as const; type BaseSectionLabel = (typeof BASE_SECTION_LABELS)[number]; -type SectionNavLabel = BaseSectionLabel | "System 1"; +type SectionNavLabel = BaseSectionLabel; const SECTION_CONTENT_MATCHERS: Record = { General: /Theme/i, @@ -94,38 +93,3 @@ export const SectionsSmoke: AppStory = { } }, }; - -export const System1SectionSmoke: AppStory = { - render: () => ( - - setupSettingsStory({ - experiments: { [EXPERIMENT_IDS.SYSTEM_1]: true }, - providersConfig: { - anthropic: { - apiKeySet: true, - isEnabled: true, - isConfigured: true, - baseUrl: "", - models: ["claude-sonnet-4-20250514", "claude-opus-4-20250514"], - }, - openai: { - apiKeySet: true, - isEnabled: true, - isConfigured: true, - baseUrl: "", - models: ["gpt-4o", "gpt-4o-mini", "o1-preview"], - }, - }, - }) - } - /> - ), - play: async ({ canvasElement }: { canvasElement: HTMLElement }) => { - const canvas = within(canvasElement); - - await openSettings(canvasElement); - await clickSectionButton(canvasElement, "System 1"); - await canvas.findByText(/System 1 Model/i); - }, -}; diff --git a/src/browser/features/Settings/SettingsPage.tsx b/src/browser/features/Settings/SettingsPage.tsx index cc11d67015..164a15e51e 100644 --- a/src/browser/features/Settings/SettingsPage.tsx +++ b/src/browser/features/Settings/SettingsPage.tsx @@ -11,7 +11,6 @@ import { Keyboard, Layout, Container, - BrainCircuit, Shield, ShieldCheck, Server, @@ -27,7 +26,6 @@ import { GeneralSection } from "./Sections/GeneralSection"; import { TasksSection } from "./Sections/TasksSection"; import { ProvidersSection } from "./Sections/ProvidersSection"; import { ModelsSection } from "./Sections/ModelsSection"; -import { System1Section } from "./Sections/System1Section"; import { GovernorSection } from "./Sections/GovernorSection"; import { Button } from "@/browser/components/Button/Button"; import { MCPSettingsSection } from "./Sections/MCPSettingsSection"; @@ -124,28 +122,18 @@ interface SettingsPageProps { export function SettingsPage(props: SettingsPageProps) { const { close, activeSection, setActiveSection } = useSettings(); const onboardingPause = useOnboardingPause(); - const system1Enabled = useExperimentValue(EXPERIMENT_IDS.SYSTEM_1); const governorEnabled = useExperimentValue(EXPERIMENT_IDS.MUX_GOVERNOR); const workspaceHeartbeatsEnabled = useExperimentValue(EXPERIMENT_IDS.WORKSPACE_HEARTBEATS); // Keep routing on a valid section when an experiment-gated section is disabled. useEffect(() => { - if (!system1Enabled && activeSection === "system1") { - setActiveSection(BASE_SECTIONS[0]?.id ?? "general"); - } if (!governorEnabled && activeSection === "governor") { setActiveSection(BASE_SECTIONS[0]?.id ?? "general"); } if (!workspaceHeartbeatsEnabled && activeSection === "heartbeat") { setActiveSection(BASE_SECTIONS[0]?.id ?? "general"); } - }, [ - activeSection, - setActiveSection, - system1Enabled, - governorEnabled, - workspaceHeartbeatsEnabled, - ]); + }, [activeSection, setActiveSection, governorEnabled, workspaceHeartbeatsEnabled]); // Close settings on Escape. Uses bubble phase so inner surfaces (Select dropdowns, // Popover, Dialog) that call stopPropagation/preventDefault on Escape get first @@ -165,17 +153,6 @@ export function SettingsPage(props: SettingsPageProps) { return () => window.removeEventListener("keydown", onKeyDown); }, [close]); let sections: SettingsSection[] = BASE_SECTIONS; - if (system1Enabled) { - sections = [ - ...sections, - { - id: "system1", - label: "System 1", - icon: , - component: System1Section, - }, - ]; - } if (governorEnabled) { sections = [ ...sections, diff --git a/src/browser/features/Tools/BashToolCall.tsx b/src/browser/features/Tools/BashToolCall.tsx index 2d4fca6490..4c0227ae45 100644 --- a/src/browser/features/Tools/BashToolCall.tsx +++ b/src/browser/features/Tools/BashToolCall.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useRef, useState } from "react"; -import { FileText, Info, Layers, Loader2 } from "lucide-react"; +import { FileText, Info, Layers } from "lucide-react"; import type { BashToolArgs, BashToolResult } from "@/common/types/tools"; import { BASH_DEFAULT_TIMEOUT_SECS } from "@/common/constants/toolLimits"; import { @@ -41,7 +41,6 @@ const EMPTY_LIVE_OUTPUT: BashLiveOutputView = { stderr: "", combined: "", truncated: false, - phase: undefined, }; export const BashToolCall: React.FC = ({ @@ -152,8 +151,6 @@ export const BashToolCall: React.FC = ({ const showLiveOutput = !isBackground && (status === "executing" || (Boolean(liveOutput) && !resultHasOutput)); - const isFilteringLiveOutput = showLiveOutput && liveOutputView.phase === "filtering"; - const canSendToBackground = Boolean( toolCallId && workspaceId && foregroundBashToolCallIds.has(toolCallId) ); @@ -325,8 +322,7 @@ export const BashToolCall: React.FC = ({ className={cn( "px-2 py-1.5", (showLiveOutput ? combinedLiveOutput.length === 0 : !completedHasOutput) && - "text-muted italic", - isFilteringLiveOutput && "opacity-60 blur-[1px]" + "text-muted italic" )} > {showLiveOutput @@ -339,14 +335,6 @@ export const BashToolCall: React.FC = ({ ? completedOutput : "No output"} - {isFilteringLiveOutput && ( -
-
-
-
- )} )} diff --git a/src/browser/hooks/useExperiments.test.ts b/src/browser/hooks/useExperiments.test.ts index 52de767566..2959f6a89a 100644 --- a/src/browser/hooks/useExperiments.test.ts +++ b/src/browser/hooks/useExperiments.test.ts @@ -24,17 +24,17 @@ describe("isExperimentEnabled", () => { }); test("returns undefined when no local override exists for a user-overridable experiment", () => { - expect(isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1)).toBeUndefined(); + expect(isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING)).toBeUndefined(); }); test("returns boolean when local override exists", () => { - const key = getExperimentKey(EXPERIMENT_IDS.SYSTEM_1); + const key = getExperimentKey(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING); globalThis.window.localStorage.setItem(key, JSON.stringify(true)); - expect(isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1)).toBe(true); + expect(isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING)).toBe(true); globalThis.window.localStorage.setItem(key, JSON.stringify(false)); - expect(isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1)).toBe(false); + expect(isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING)).toBe(false); }); test("returns false for a platform-restricted experiment on unsupported platforms", () => { @@ -48,16 +48,16 @@ describe("isExperimentEnabled", () => { }); test('treats literal "undefined" as no override', () => { - const key = getExperimentKey(EXPERIMENT_IDS.SYSTEM_1); + const key = getExperimentKey(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING); globalThis.window.localStorage.setItem(key, "undefined"); - expect(isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1)).toBeUndefined(); + expect(isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING)).toBeUndefined(); }); test("treats non-boolean stored value as no override", () => { - const key = getExperimentKey(EXPERIMENT_IDS.SYSTEM_1); + const key = getExperimentKey(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING); globalThis.window.localStorage.setItem(key, JSON.stringify("test")); - expect(isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1)).toBeUndefined(); + expect(isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING)).toBeUndefined(); }); }); diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts index d09589f113..3f67bf1da6 100644 --- a/src/browser/hooks/useSendMessageOptions.ts +++ b/src/browser/hooks/useSendMessageOptions.ts @@ -5,15 +5,8 @@ import { usePersistedState } from "./usePersistedState"; import { buildSendMessageOptions, normalizeModelPreference, - normalizeSystem1Model, - normalizeSystem1ThinkingLevel, } from "@/browser/utils/messages/buildSendMessageOptions"; -import { - DEFAULT_MODEL_KEY, - getModelKey, - PREFERRED_SYSTEM_1_MODEL_KEY, - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, -} from "@/common/constants/storage"; +import { DEFAULT_MODEL_KEY, getModelKey } from "@/common/constants/storage"; import type { SendMessageOptions } from "@/common/orpc/types"; import { useProviderOptions } from "./useProviderOptions"; import { useExperimentOverrideValue } from "./useExperiments"; @@ -62,23 +55,10 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING_EXCLUSIVE ); const advisorTool = useExperimentOverrideValue(EXPERIMENT_IDS.ADVISOR_TOOL); - const system1 = useExperimentOverrideValue(EXPERIMENT_IDS.SYSTEM_1); const execSubagentHardRestart = useExperimentOverrideValue( EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART ); - const [preferredSystem1Model] = usePersistedState(PREFERRED_SYSTEM_1_MODEL_KEY, "", { - listener: true, - }); - const system1Model = normalizeSystem1Model(preferredSystem1Model); - - const [preferredSystem1ThinkingLevel] = usePersistedState( - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, - "off", - { listener: true } - ); - const system1ThinkingLevel = normalizeSystem1ThinkingLevel(preferredSystem1ThinkingLevel); - // Compute base model (canonical format) for UI components const baseModel = normalizeModelPreference(preferredModel, defaultModel); @@ -91,11 +71,8 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi programmaticToolCalling, programmaticToolCallingExclusive, advisorTool, - system1, execSubagentHardRestart, }, - system1Model, - system1ThinkingLevel, disableWorkspaceAgents, }); diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index df2e56536a..3a4e4635bc 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -729,19 +729,6 @@ export class WorkspaceStore { const output = (toolCallEnd.result as { output?: unknown } | undefined)?.output; if (typeof output === "string") { transient.liveBashOutput.delete(toolCallEnd.toolCallId); - } else { - // If we keep the tail buffer, ensure we don't get stuck in "filtering" UI state. - const prev = transient.liveBashOutput.get(toolCallEnd.toolCallId); - if (prev?.phase === "filtering") { - const next = appendLiveBashOutputChunk( - prev, - { text: "", isError: false, phase: "output" }, - BASH_TRUNCATE_MAX_TOTAL_BYTES - ); - if (next !== prev) { - transient.liveBashOutput.set(toolCallEnd.toolCallId, next); - } - } } } @@ -3769,16 +3756,14 @@ export class WorkspaceStore { } if (isBashOutputEvent(data)) { - const hasText = data.text.length > 0; - const hasPhase = data.phase !== undefined; - if (!hasText && !hasPhase) return; + if (data.text.length === 0) return; const transient = this.assertChatTransientState(workspaceId); const prev = transient.liveBashOutput.get(data.toolCallId); const next = appendLiveBashOutputChunk( prev, - { text: data.text, isError: data.isError, phase: data.phase }, + { text: data.text, isError: data.isError }, BASH_TRUNCATE_MAX_TOTAL_BYTES ); diff --git a/src/browser/utils/messages/buildSendMessageOptions.ts b/src/browser/utils/messages/buildSendMessageOptions.ts index 307231e458..a8f7cb62e0 100644 --- a/src/browser/utils/messages/buildSendMessageOptions.ts +++ b/src/browser/utils/messages/buildSendMessageOptions.ts @@ -1,14 +1,12 @@ import type { SendMessageOptions } from "@/common/orpc/types"; import type { ThinkingLevel } from "@/common/types/thinking"; import type { MuxProviderOptions } from "@/common/types/providerOptions"; -import { coerceThinkingLevel } from "@/common/types/thinking"; -import { normalizeSelectedModel, normalizeToCanonical } from "@/common/utils/ai/models"; +import { normalizeSelectedModel } from "@/common/utils/ai/models"; export interface ExperimentValues { programmaticToolCalling: boolean | undefined; programmaticToolCallingExclusive: boolean | undefined; advisorTool: boolean | undefined; - system1: boolean | undefined; execSubagentHardRestart: boolean | undefined; } @@ -18,8 +16,6 @@ export interface SendMessageOptionsInput { agentId: string; providerOptions: MuxProviderOptions; experiments: ExperimentValues; - system1Model?: string; - system1ThinkingLevel?: ThinkingLevel; disableWorkspaceAgents?: boolean; } @@ -30,32 +26,14 @@ export function normalizeModelPreference(rawModel: unknown, fallbackModel: strin return normalizeSelectedModel(trimmed ?? fallbackModel); } -export function normalizeSystem1Model(rawModel: unknown): string | undefined { - if (typeof rawModel !== "string") return undefined; - const trimmed = rawModel.trim(); - return trimmed.length > 0 ? trimmed : undefined; -} - -export function normalizeSystem1ThinkingLevel(rawLevel: unknown): ThinkingLevel { - return coerceThinkingLevel(rawLevel) ?? "off"; -} - /** * Construct SendMessageOptions from normalized inputs. * Single source of truth for the send-option shape — backend enforces per-model policy. */ export function buildSendMessageOptions(input: SendMessageOptionsInput): SendMessageOptions { - const system1Model = input.system1Model ? normalizeToCanonical(input.system1Model) : undefined; - const system1ThinkingLevel = - input.system1ThinkingLevel && input.system1ThinkingLevel !== "off" - ? input.system1ThinkingLevel - : undefined; - return { thinkingLevel: input.thinkingLevel, model: input.model, - ...(system1Model && { system1Model }), - ...(system1ThinkingLevel && { system1ThinkingLevel }), agentId: input.agentId, providerOptions: input.providerOptions, experiments: { ...input.experiments }, diff --git a/src/browser/utils/messages/liveBashOutputBuffer.test.ts b/src/browser/utils/messages/liveBashOutputBuffer.test.ts index 4fca11b6cd..521f6e20da 100644 --- a/src/browser/utils/messages/liveBashOutputBuffer.test.ts +++ b/src/browser/utils/messages/liveBashOutputBuffer.test.ts @@ -16,22 +16,6 @@ describe("appendLiveBashOutputChunk", () => { expect(b.truncated).toBe(false); }); - it("supports phase-only updates", () => { - const a = appendLiveBashOutputChunk(undefined, { text: "out\n", isError: false }, 1024); - expect(a.phase).toBeUndefined(); - - const b = appendLiveBashOutputChunk(a, { text: "", isError: false, phase: "filtering" }, 1024); - expect(b.combined).toBe("out\n"); - expect(b.phase).toBe("filtering"); - - // Phase-only updates should be referentially stable when nothing changes. - const c = appendLiveBashOutputChunk(b, { text: "", isError: false, phase: "filtering" }, 1024); - expect(c).toBe(b); - - const d = appendLiveBashOutputChunk(b, { text: "", isError: false, phase: "output" }, 1024); - expect(d.phase).toBe("output"); - }); - it("normalizes carriage returns to newlines", () => { const a = appendLiveBashOutputChunk(undefined, { text: "a\rb", isError: false }, 1024); expect(a.stdout).toBe("a\nb"); diff --git a/src/browser/utils/messages/liveBashOutputBuffer.ts b/src/browser/utils/messages/liveBashOutputBuffer.ts index efe26f927e..5dc9b763fb 100644 --- a/src/browser/utils/messages/liveBashOutputBuffer.ts +++ b/src/browser/utils/messages/liveBashOutputBuffer.ts @@ -4,11 +4,6 @@ export interface LiveBashOutputView { /** Combined output in emission order (stdout/stderr interleaved). */ combined: string; truncated: boolean; - /** - * Optional UI state hint. When set to "filtering", the backend has finished producing output - * and is post-processing it (e.g., System1 log filtering) before emitting tool-call-end. - */ - phase?: "output" | "filtering"; } interface LiveBashOutputSegment { @@ -62,7 +57,7 @@ function getUtf8ByteLength(text: string): number { export function appendLiveBashOutputChunk( prev: LiveBashOutputInternal | undefined, - chunk: { text: string; isError: boolean; phase?: "output" | "filtering" }, + chunk: { text: string; isError: boolean }, maxBytes: number ): LiveBashOutputInternal { if (maxBytes <= 0) { @@ -76,17 +71,13 @@ export function appendLiveBashOutputChunk( stderr: "", combined: "", truncated: false, - phase: undefined, segments: [], totalBytes: 0, } satisfies LiveBashOutputInternal); const normalizedText = normalizeNewlines(chunk.text); - const phaseChanged = chunk.phase !== undefined && chunk.phase !== base.phase; - - // Phase-only updates (no new text) are valid; they power UI overlays like "Compacting output…". - if (normalizedText.length === 0 && !phaseChanged) return base; + if (normalizedText.length === 0) return base; // Clone for purity (tests + avoids hidden mutation assumptions). const next: LiveBashOutputInternal = { @@ -94,19 +85,10 @@ export function appendLiveBashOutputChunk( stderr: base.stderr, combined: base.combined, truncated: base.truncated, - phase: base.phase, segments: base.segments.slice(), totalBytes: base.totalBytes, }; - if (chunk.phase !== undefined) { - next.phase = chunk.phase; - } - - if (normalizedText.length === 0) { - return next; - } - const segment: LiveBashOutputSegment = { isError: chunk.isError, text: normalizedText, diff --git a/src/browser/utils/messages/sendOptions.test.ts b/src/browser/utils/messages/sendOptions.test.ts index 5628baaeeb..de45eac4b5 100644 --- a/src/browser/utils/messages/sendOptions.test.ts +++ b/src/browser/utils/messages/sendOptions.test.ts @@ -1,10 +1,6 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { GlobalWindow } from "happy-dom"; -import { - getModelKey, - PREFERRED_SYSTEM_1_MODEL_KEY, - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, -} from "@/common/constants/storage"; +import { getModelKey } from "@/common/constants/storage"; import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults"; import { getSendOptionsFromStorage } from "./sendOptions"; import { normalizeModelPreference } from "./buildSendMessageOptions"; @@ -53,20 +49,6 @@ describe("getSendOptionsFromStorage", () => { ); }); - test("omits system1 thinking when set to off", () => { - const workspaceId = "ws-2"; - - window.localStorage.setItem(PREFERRED_SYSTEM_1_MODEL_KEY, JSON.stringify("openai:gpt-5.2")); - window.localStorage.setItem(PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, JSON.stringify("off")); - - const options = getSendOptionsFromStorage(workspaceId); - expect(options.system1ThinkingLevel).toBeUndefined(); - - window.localStorage.setItem(PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, JSON.stringify("high")); - const withThinking = getSendOptionsFromStorage(workspaceId); - expect(withThinking.system1ThinkingLevel).toBe("high"); - }); - test("includes Anthropic prompt cache TTL from persisted provider options", () => { const workspaceId = "ws-3"; diff --git a/src/browser/utils/messages/sendOptions.ts b/src/browser/utils/messages/sendOptions.ts index 763b3786c2..90830fc7fc 100644 --- a/src/browser/utils/messages/sendOptions.ts +++ b/src/browser/utils/messages/sendOptions.ts @@ -4,20 +4,12 @@ import { getThinkingLevelByModelKey, getThinkingLevelKey, getDisableWorkspaceAgentsKey, - PREFERRED_SYSTEM_1_MODEL_KEY, - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, } from "@/common/constants/storage"; -import { - readPersistedState, - readPersistedString, - updatePersistedState, -} from "@/browser/hooks/usePersistedState"; +import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState"; import { getDefaultModel } from "@/browser/hooks/useModelsFromSettings"; import { buildSendMessageOptions, normalizeModelPreference, - normalizeSystem1Model, - normalizeSystem1ThinkingLevel, } from "@/browser/utils/messages/buildSendMessageOptions"; import type { SendMessageOptions } from "@/common/orpc/types"; import type { ThinkingLevel } from "@/common/types/thinking"; @@ -74,11 +66,6 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio const providerOptions = getProviderOptions(); - const system1Model = normalizeSystem1Model(readPersistedString(PREFERRED_SYSTEM_1_MODEL_KEY)); - const system1ThinkingLevel = normalizeSystem1ThinkingLevel( - readPersistedState(PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, "off") - ); - const disableWorkspaceAgents = readPersistedState( getDisableWorkspaceAgentsKey(workspaceId), false @@ -86,8 +73,6 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio return buildSendMessageOptions({ model: baseModel, - system1Model, - system1ThinkingLevel, agentId, thinkingLevel, providerOptions, @@ -98,7 +83,6 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING_EXCLUSIVE ), advisorTool: isExperimentEnabled(EXPERIMENT_IDS.ADVISOR_TOOL), - system1: isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1), execSubagentHardRestart: isExperimentEnabled(EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART), }, }); diff --git a/src/browser/utils/modelPreferenceRepair.test.ts b/src/browser/utils/modelPreferenceRepair.test.ts index 57bb8e2e56..d51b77eb7d 100644 --- a/src/browser/utils/modelPreferenceRepair.test.ts +++ b/src/browser/utils/modelPreferenceRepair.test.ts @@ -11,7 +11,6 @@ import { DEFAULT_MODEL_KEY, HIDDEN_MODELS_KEY, LAST_CUSTOM_MODEL_PROVIDER_KEY, - PREFERRED_SYSTEM_1_MODEL_KEY, getModelKey, getWorkspaceAISettingsByAgentKey, } from "@/common/constants/storage"; @@ -128,8 +127,7 @@ describe("repairLocalModelPreferencesForRemovedProvider", () => { expect(readString(LAST_CUSTOM_MODEL_PROVIDER_KEY)).toBe(OTHER_PROVIDER); }); - test("clears preferred System 1 and agent default models for the removed provider", () => { - writeState(PREFERRED_SYSTEM_1_MODEL_KEY, `${REMOVED_PROVIDER}:system1-model`); + test("clears agent default models for the removed provider", () => { writeState(AGENT_AI_DEFAULTS_KEY, { exec: { modelString: `${REMOVED_PROVIDER}:exec-model`, @@ -143,7 +141,6 @@ describe("repairLocalModelPreferencesForRemovedProvider", () => { repairLocalModelPreferencesForRemovedProvider(REMOVED_PROVIDER, []); - expect(readString(PREFERRED_SYSTEM_1_MODEL_KEY)).toBe(""); expect(readState(AGENT_AI_DEFAULTS_KEY, {})).toEqual({ exec: { thinkingLevel: "high", diff --git a/src/browser/utils/modelPreferenceRepair.ts b/src/browser/utils/modelPreferenceRepair.ts index 4d4cb61394..d62f757a00 100644 --- a/src/browser/utils/modelPreferenceRepair.ts +++ b/src/browser/utils/modelPreferenceRepair.ts @@ -9,7 +9,6 @@ import { DEFAULT_MODEL_KEY, HIDDEN_MODELS_KEY, LAST_CUSTOM_MODEL_PROVIDER_KEY, - PREFERRED_SYSTEM_1_MODEL_KEY, getModelKey, getWorkspaceAISettingsByAgentKey, } from "@/common/constants/storage"; @@ -131,7 +130,6 @@ export function repairLocalModelPreferencesForRemovedProvider( workspaceIds: Iterable ): void { repairPersistedModelString(DEFAULT_MODEL_KEY, provider, WORKSPACE_DEFAULTS.model); - repairPersistedModelString(PREFERRED_SYSTEM_1_MODEL_KEY, provider, ""); repairHiddenModels(provider); repairAgentAiDefaults(provider); repairLastCustomModelProvider(provider); diff --git a/src/cli/run.ts b/src/cli/run.ts index 24ff51efcf..1c80c8a888 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -206,7 +206,6 @@ function buildExperimentsObject(experimentIds: string[]): SendMessageOptions["ex return { programmaticToolCalling: experimentIds.includes("programmatic-tool-calling"), programmaticToolCallingExclusive: experimentIds.includes("programmatic-tool-calling-exclusive"), - system1: experimentIds.includes("system-1"), execSubagentHardRestart: experimentIds.includes("exec-subagent-hard-restart"), }; } diff --git a/src/common/config/schemas/taskSettings.ts b/src/common/config/schemas/taskSettings.ts index b8a7841ba8..4c9e7a34a6 100644 --- a/src/common/config/schemas/taskSettings.ts +++ b/src/common/config/schemas/taskSettings.ts @@ -5,13 +5,6 @@ export const TASK_SETTINGS_LIMITS = { maxTaskNestingDepth: { min: 1, max: 5, default: 3 }, } as const; -export const SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS = { - bashOutputCompactionMinLines: { min: 0, max: 1_000, default: 10 }, - bashOutputCompactionMinTotalBytes: { min: 0, max: 16 * 1024, default: 4 * 1024 }, - bashOutputCompactionMaxKeptLines: { min: 1, max: 1_000, default: 40 }, - bashOutputCompactionTimeoutMs: { min: 1_000, max: 120_000, default: 5_000 }, -} as const; - export const PlanSubagentExecutorRoutingSchema = z.enum(["exec", "orchestrator", "auto"]); export type PlanSubagentExecutorRouting = z.infer; @@ -33,31 +26,6 @@ export const TaskSettingsSchema = z.object({ preserveSubagentsUntilArchive: z.boolean().optional(), planSubagentExecutorRouting: PlanSubagentExecutorRoutingSchema.optional(), planSubagentDefaultsToOrchestrator: z.boolean().optional(), - bashOutputCompactionMinLines: z - .number() - .int() - .min(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.min) - .max(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.max) - .optional(), - bashOutputCompactionMinTotalBytes: z - .number() - .int() - .min(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.min) - .max(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.max) - .optional(), - bashOutputCompactionMaxKeptLines: z - .number() - .int() - .min(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.min) - .max(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.max) - .optional(), - bashOutputCompactionTimeoutMs: z - .number() - .int() - .min(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.min) - .max(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.max) - .optional(), - bashOutputCompactionHeuristicFallback: z.boolean().optional(), }); export type TaskSettings = z.infer; diff --git a/src/common/constants/experiments.ts b/src/common/constants/experiments.ts index 80dc32e5dd..eebb22e742 100644 --- a/src/common/constants/experiments.ts +++ b/src/common/constants/experiments.ts @@ -9,7 +9,6 @@ export const EXPERIMENT_IDS = { PROGRAMMATIC_TOOL_CALLING: "programmatic-tool-calling", PROGRAMMATIC_TOOL_CALLING_EXCLUSIVE: "programmatic-tool-calling-exclusive", CONFIGURABLE_BIND_URL: "configurable-bind-url", - SYSTEM_1: "system-1", EXEC_SUBAGENT_HARD_RESTART: "exec-subagent-hard-restart", MUX_GOVERNOR: "mux-governor", MULTI_PROJECT_WORKSPACES: "multi-project-workspaces", @@ -74,14 +73,6 @@ export const EXPERIMENTS: Record = { userOverridable: true, showInSettings: true, }, - [EXPERIMENT_IDS.SYSTEM_1]: { - id: EXPERIMENT_IDS.SYSTEM_1, - name: "System 1", - description: "Context optimization helpers inspired by Thinking, Fast and Slow (Kahneman)", - enabledByDefault: false, - userOverridable: true, - showInSettings: true, - }, [EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART]: { id: EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART, name: "Exec sub-agent hard restart", diff --git a/src/common/constants/storage.ts b/src/common/constants/storage.ts index c772ea3cf1..4db64cfa70 100644 --- a/src/common/constants/storage.ts +++ b/src/common/constants/storage.ts @@ -333,18 +333,6 @@ export const DEFAULT_MODEL_KEY = "model-default"; */ export const HIDDEN_MODELS_KEY = "hidden-models"; -/** - * Get the localStorage key for the preferred System 1 model (global) - * Format: "preferredSystem1Model" - */ -export const PREFERRED_SYSTEM_1_MODEL_KEY = "preferredSystem1Model"; - -/** - * Get the localStorage key for the preferred System 1 thinking level (global) - * Format: "preferredSystem1ThinkingLevel" - */ -export const PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY = "preferredSystem1ThinkingLevel"; - /** * Get the localStorage key for cached per-agent AI defaults (global). * Format: "agentAiDefaults" diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index fd41785408..c6cc07a179 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -356,10 +356,6 @@ export const BashOutputEventSchema = z.object({ type: z.literal("bash-output"), workspaceId: z.string(), toolCallId: z.string(), - phase: z - .enum(["output", "filtering"]) - .optional() - .meta({ description: "UI hint for bash output state" }), text: z.string(), isError: z.boolean().meta({ description: "True if this chunk is from stderr" }), timestamp: z.number().meta({ description: "When output was flushed (Date.now())" }), @@ -643,7 +639,6 @@ export const ExperimentsSchema = z.object({ programmaticToolCalling: z.boolean().optional(), programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), - system1: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), }); @@ -652,8 +647,6 @@ export const SendMessageOptionsSchema = z.object({ editMessageId: z.string().optional(), thinkingLevel: ThinkingLevelSchema.optional(), model: z.string("No model specified"), - system1ThinkingLevel: ThinkingLevelSchema.optional(), - system1Model: z.string().optional(), toolPolicy: ToolPolicySchema.optional(), additionalSystemInstructions: z.string().optional(), maxOutputTokens: z.number().optional(), diff --git a/src/common/types/message.ts b/src/common/types/message.ts index 27616e51cc..72b852ca09 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -74,8 +74,6 @@ export type StartupRetrySendOptions = Pick< | "model" | "agentId" | "thinkingLevel" - | "system1ThinkingLevel" - | "system1Model" | "toolPolicy" | "additionalSystemInstructions" | "maxOutputTokens" @@ -99,8 +97,6 @@ export function pickStartupRetrySendOptions( model: options.model, agentId: options.agentId, thinkingLevel: options.thinkingLevel, - system1ThinkingLevel: options.system1ThinkingLevel, - system1Model: options.system1Model, toolPolicy: options.toolPolicy, additionalSystemInstructions: options.additionalSystemInstructions, maxOutputTokens: options.maxOutputTokens, diff --git a/src/common/types/tasks.test.ts b/src/common/types/tasks.test.ts index 74384b7a6a..289b1690e3 100644 --- a/src/common/types/tasks.test.ts +++ b/src/common/types/tasks.test.ts @@ -1,11 +1,6 @@ import { describe, expect, test } from "bun:test"; -import { - DEFAULT_TASK_SETTINGS, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS, - TASK_SETTINGS_LIMITS, - normalizeTaskSettings, -} from "./tasks"; +import { DEFAULT_TASK_SETTINGS, TASK_SETTINGS_LIMITS, normalizeTaskSettings } from "./tasks"; describe("normalizeTaskSettings", () => { test("fills defaults when missing", () => { @@ -32,37 +27,16 @@ describe("normalizeTaskSettings", () => { const normalized = normalizeTaskSettings({ maxParallelAgentTasks: 999, maxTaskNestingDepth: 0, - bashOutputCompactionMinLines: -1, - bashOutputCompactionMinTotalBytes: 999999999999, - bashOutputCompactionMaxKeptLines: 0, - bashOutputCompactionTimeoutMs: 0, }); expect(normalized.maxParallelAgentTasks).toBe(TASK_SETTINGS_LIMITS.maxParallelAgentTasks.max); expect(normalized.maxTaskNestingDepth).toBe(TASK_SETTINGS_LIMITS.maxTaskNestingDepth.min); - - expect(normalized.bashOutputCompactionMinLines).toBe( - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.min - ); - expect(normalized.bashOutputCompactionMinTotalBytes).toBe( - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.max - ); - expect(normalized.bashOutputCompactionMaxKeptLines).toBe( - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.min - ); - expect(normalized.bashOutputCompactionTimeoutMs).toBe( - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.min - ); }); test("uses fallbacks for NaN", () => { const normalized = normalizeTaskSettings({ maxParallelAgentTasks: Number.NaN, maxTaskNestingDepth: Number.NaN, - bashOutputCompactionMinLines: Number.NaN, - bashOutputCompactionMinTotalBytes: Number.NaN, - bashOutputCompactionMaxKeptLines: Number.NaN, - bashOutputCompactionTimeoutMs: Number.NaN, }); expect(normalized).toEqual(DEFAULT_TASK_SETTINGS); diff --git a/src/common/types/tasks.ts b/src/common/types/tasks.ts index ace55458b9..51fa3c6ae1 100644 --- a/src/common/types/tasks.ts +++ b/src/common/types/tasks.ts @@ -2,10 +2,7 @@ import type { PlanSubagentExecutorRouting, TaskSettings as TaskSettingsOnDisk, } from "@/common/config/schemas/taskSettings"; -import { - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS, - TASK_SETTINGS_LIMITS, -} from "@/common/config/schemas/taskSettings"; +import { TASK_SETTINGS_LIMITS } from "@/common/config/schemas/taskSettings"; import type { SubagentAiDefaults, SubagentAiDefaultsEntry, @@ -14,10 +11,7 @@ import assert from "@/common/utils/assert"; import { coerceThinkingLevel, type ThinkingLevel } from "./thinking"; export type { PlanSubagentExecutorRouting, SubagentAiDefaults, SubagentAiDefaultsEntry }; -export { - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS, - TASK_SETTINGS_LIMITS, -} from "@/common/config/schemas/taskSettings"; +export { TASK_SETTINGS_LIMITS } from "@/common/config/schemas/taskSettings"; // Normalized runtime settings always include numeric task limits. export type TaskSettings = Required< @@ -32,16 +26,6 @@ export const DEFAULT_TASK_SETTINGS: TaskSettings = { preserveSubagentsUntilArchive: false, planSubagentExecutorRouting: "auto", planSubagentDefaultsToOrchestrator: false, - - bashOutputCompactionMinLines: - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default, - bashOutputCompactionMinTotalBytes: - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.default, - bashOutputCompactionMaxKeptLines: - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.default, - bashOutputCompactionTimeoutMs: - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.default, - bashOutputCompactionHeuristicFallback: true, }; export function normalizeSubagentAiDefaults(raw: unknown): SubagentAiDefaults { @@ -139,37 +123,6 @@ export function normalizeTaskSettings(raw: unknown): TaskSettings { // Keep the deprecated boolean in sync for downgrade compatibility. const planSubagentDefaultsToOrchestrator = planSubagentExecutorRouting === "orchestrator"; - const bashOutputCompactionMinLines = clampInt( - record.bashOutputCompactionMinLines, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.min, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.max - ); - const bashOutputCompactionMinTotalBytes = clampInt( - record.bashOutputCompactionMinTotalBytes, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.default, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.min, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.max - ); - const bashOutputCompactionMaxKeptLines = clampInt( - record.bashOutputCompactionMaxKeptLines, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.default, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.min, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.max - ); - const bashOutputCompactionTimeoutMsRaw = clampInt( - record.bashOutputCompactionTimeoutMs, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.default, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.min, - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.max - ); - - const bashOutputCompactionHeuristicFallback = - typeof record.bashOutputCompactionHeuristicFallback === "boolean" - ? record.bashOutputCompactionHeuristicFallback - : (DEFAULT_TASK_SETTINGS.bashOutputCompactionHeuristicFallback ?? true); - const bashOutputCompactionTimeoutMs = Math.floor(bashOutputCompactionTimeoutMsRaw / 1000) * 1000; - const result: TaskSettings = { maxParallelAgentTasks, maxTaskNestingDepth, @@ -177,11 +130,6 @@ export function normalizeTaskSettings(raw: unknown): TaskSettings { preserveSubagentsUntilArchive, planSubagentExecutorRouting, planSubagentDefaultsToOrchestrator, - bashOutputCompactionMinLines, - bashOutputCompactionMinTotalBytes, - bashOutputCompactionMaxKeptLines, - bashOutputCompactionTimeoutMs, - bashOutputCompactionHeuristicFallback, }; assert( @@ -212,31 +160,5 @@ export function normalizeTaskSettings(raw: unknown): TaskSettings { "normalizeTaskSettings: planSubagentDefaultsToOrchestrator must be a boolean" ); - assert( - Number.isInteger(bashOutputCompactionMinLines), - "normalizeTaskSettings: bashOutputCompactionMinLines must be an integer" - ); - assert( - Number.isInteger(bashOutputCompactionMinTotalBytes), - "normalizeTaskSettings: bashOutputCompactionMinTotalBytes must be an integer" - ); - assert( - Number.isInteger(bashOutputCompactionMaxKeptLines), - "normalizeTaskSettings: bashOutputCompactionMaxKeptLines must be an integer" - ); - assert( - Number.isInteger(bashOutputCompactionTimeoutMs), - "normalizeTaskSettings: bashOutputCompactionTimeoutMs must be an integer" - ); - - assert( - typeof bashOutputCompactionHeuristicFallback === "boolean", - "normalizeTaskSettings: bashOutputCompactionHeuristicFallback must be a boolean" - ); - assert( - bashOutputCompactionTimeoutMs % 1000 === 0, - "normalizeTaskSettings: bashOutputCompactionTimeoutMs must be a whole number of seconds" - ); - return result; } diff --git a/src/common/utils/agentTools.test.ts b/src/common/utils/agentTools.test.ts index 1b6cb2de91..835569a6dd 100644 --- a/src/common/utils/agentTools.test.ts +++ b/src/common/utils/agentTools.test.ts @@ -44,7 +44,7 @@ describe("isExecLikeEditingCapableInResolvedChain", () => { id: "exec", tools: { add: [".*"], - remove: ["propose_plan", "ask_user_question", "system1_keep_ranges"], + remove: ["propose_plan", "ask_user_question"], }, }, ]; diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index 5c97c45f2c..aa4beb903b 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -41,7 +41,6 @@ import { ConfigOperationsSchema, } from "@/common/config/schemas/configOperations"; import { TOOL_EDIT_WARNING } from "@/common/types/tools"; -import { SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS } from "@/common/types/tasks"; import { THINKING_LEVELS } from "@/common/types/thinking"; import { zodToJsonSchema } from "zod-to-json-schema"; @@ -845,30 +844,6 @@ interface ToolSchema { }; } -/** - * Schema for a single keep-range item in the system1_keep_ranges tool. - * Extracted as a named export so internal code can derive the type via z.infer<> - * instead of maintaining a hand-written interface. - * - * Note: the tool schema applies .passthrough() on top of this to tolerate extra - * keys from models, but the inferred type is the strict shape. - */ -export const System1KeepRangeSchema = z.object({ - start: z.coerce - .number() - .finite() - .min(1) - .describe("1-based start line (inclusive) in the numbered output"), - end: z.coerce - .number() - .finite() - .min(1) - .describe("1-based end line (inclusive) in the numbered output"), - // .nullish() accepts both null and undefined, so the preprocess - // hack that mapped null→undefined is no longer needed. - reason: z.string().nullish().describe("Optional short reason for keeping this range"), -}); - // ----------------------------------------------------------------------------- // propose_name (workspace name generation) // ----------------------------------------------------------------------------- @@ -1442,26 +1417,6 @@ export const TOOL_DEFINITIONS = { "The current stream will end and a new stream will start with the selected agent.", schema: SwitchAgentToolArgsSchema, }, - system1_keep_ranges: { - description: - "Internal tool used by mux to record which line ranges to keep when filtering large bash output.", - schema: z - .object({ - keep_ranges: z - .array( - System1KeepRangeSchema - // Providers/models sometimes include extra keys in tool arguments; be permissive and - // ignore them rather than failing the whole compaction call. - .passthrough() - ) - .min(1) - // Allow at least as many ranges as the user can request via maxKeptLines. - // (In the worst case, the model may emit one 1-line range per kept line.) - .max(SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.max) - .describe("Line ranges to keep"), - }) - .passthrough(), - }, todo_write: { description: @@ -2130,7 +2085,6 @@ export function getAvailableTools( "task_list", ...(enableAgentReport ? ["agent_report"] : []), "switch_agent", - "system1_keep_ranges", "todo_write", "todo_read", "notify", diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index ad47df4779..e1870d3bde 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -36,7 +36,6 @@ import { createMuxConfigReadTool } from "@/node/services/tools/mux_config_read"; import { createMuxConfigWriteTool } from "@/node/services/tools/mux_config_write"; import { createAgentReportTool } from "@/node/services/tools/agent_report"; import { createSwitchAgentTool } from "@/node/services/tools/switch_agent"; -import { createSystem1KeepRangesTool } from "@/node/services/tools/system1_keep_ranges"; import { wrapWithInitWait } from "@/node/services/tools/wrapWithInitWait"; import { withHooks, type HookConfig } from "@/node/services/tools/withHooks"; import { log } from "@/node/services/log"; @@ -438,7 +437,6 @@ export async function getToolsForModel( // exec-derived agents see its "call me immediately" description. ...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}), switch_agent: createSwitchAgentTool(config), - system1_keep_ranges: createSystem1KeepRangesTool(config), todo_write: createTodoWriteTool(config), todo_read: createTodoReadTool(config), notify: createNotifyTool(config), diff --git a/src/common/utils/truncateBashOutput.ts b/src/common/utils/truncateBashOutput.ts deleted file mode 100644 index 88db9d859c..0000000000 --- a/src/common/utils/truncateBashOutput.ts +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Hard truncation for bash output to prevent unbounded context growth. - * - * This is a safety net that applies the same limits as foreground bash - * (BASH_HARD_MAX_LINES / BASH_MAX_TOTAL_BYTES) to all bash-family tool output. - * - * Used by maybeFilterBashOutputWithSystem1 to ensure output is bounded even - * when System1 compaction is skipped or fails. - */ - -import { BASH_HARD_MAX_LINES, BASH_MAX_TOTAL_BYTES } from "@/common/constants/toolLimits"; - -export interface TruncateBashOutputResult { - output: string; - truncated: boolean; - originalLines: number; - originalBytes: number; -} - -export function truncateBashOutput(output: string): TruncateBashOutputResult { - const bytes = Buffer.byteLength(output, "utf-8"); - - // Split into lines, but don't count a trailing empty string as a line. - // "line1\nline2\n".split("\n") gives ["line1", "line2", ""], but that's 2 lines, not 3. - const rawLines = output.split("\n"); - const hasTrailingNewline = output.endsWith("\n") && rawLines.length > 0; - const lines = hasTrailingNewline ? rawLines.slice(0, -1) : rawLines; - - if (lines.length <= BASH_HARD_MAX_LINES && bytes <= BASH_MAX_TOTAL_BYTES) { - return { output, truncated: false, originalLines: lines.length, originalBytes: bytes }; - } - - // Keep tail (most recent output is usually most relevant for debugging) - let truncatedLines = lines.slice(-BASH_HARD_MAX_LINES); - // Restore trailing newline if original had one - let truncatedOutput = truncatedLines.join("\n") + (hasTrailingNewline ? "\n" : ""); - - // Also enforce byte limit (slice from end to keep recent output) - if (Buffer.byteLength(truncatedOutput, "utf-8") > BASH_MAX_TOTAL_BYTES) { - // Binary search would be more efficient but this is simple and correct - while (Buffer.byteLength(truncatedOutput, "utf-8") > BASH_MAX_TOTAL_BYTES) { - truncatedLines = truncatedLines.slice(1); - truncatedOutput = truncatedLines.join("\n"); - } - } - - return { - output: truncatedOutput, - truncated: true, - originalLines: lines.length, - originalBytes: bytes, - }; -} diff --git a/src/node/acp/streamTranslator.ts b/src/node/acp/streamTranslator.ts index d1301c877a..e87a702a0a 100644 --- a/src/node/acp/streamTranslator.ts +++ b/src/node/acp/streamTranslator.ts @@ -175,7 +175,6 @@ export class StreamTranslator { _meta: { isError: event.isError, source: "bash-output", - phase: event.phase, timestamp: event.timestamp, }, }, diff --git a/src/node/builtinAgents/desktop.md b/src/node/builtinAgents/desktop.md index 2a6b849a4e..8f5ce70bd6 100644 --- a/src/node/builtinAgents/desktop.md +++ b/src/node/builtinAgents/desktop.md @@ -43,8 +43,6 @@ tools: # No planning tools - propose_plan - ask_user_question - # Internal-only - - system1_keep_ranges # Global config and catalog tools - mux_agents_.* - agent_skill_write diff --git a/src/node/builtinAgents/exec.md b/src/node/builtinAgents/exec.md index cf0799ce02..50eaac2709 100644 --- a/src/node/builtinAgents/exec.md +++ b/src/node/builtinAgents/exec.md @@ -33,8 +33,6 @@ tools: # Exec mode doesn't use planning tools - propose_plan - ask_user_question - # Internal-only tools - - system1_keep_ranges # Global config and catalog tools stay out of general-purpose agents - mux_agents_.* - agent_skill_write diff --git a/src/node/builtinAgents/system1_bash.md b/src/node/builtinAgents/system1_bash.md deleted file mode 100644 index 7a063a7239..0000000000 --- a/src/node/builtinAgents/system1_bash.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -name: System1 Bash -description: Fast bash-output filtering (internal) -ui: - hidden: true -subagent: - runnable: false -tools: - add: - - system1_keep_ranges ---- - -You are a fast bash-output filtering assistant. - -You will be given: - -- `maxKeptLines` (budget) -- `Display name` (optional): a short intent label for the command -- `Bash script` -- `Numbered output` - -Given the numbered output, decide which lines to keep so the user sees the most relevant information. - -IMPORTANT: - -- You MUST call `system1_keep_ranges` exactly once. -- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments). - -Rules: - -- Line numbers are 1-based indices into the numbered output. -- Use the `Display name` and `Bash script` as intent hints. -- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping - representative file paths/matches and any summary/counts (not just errors). -- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings. -- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra - denoising: prefer keeping most/all lines within the budget. -- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget. -- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest - next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths, - and conflict markers instead. -- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when - the hint is the only clue explaining a blocking state. -- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context. -- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just - to reduce range count; it's OK to return many ranges when denoising (e.g., > 8). -- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning - (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only - the most informative instance plus minimal surrounding context. -- If there are many similar warnings/errors, keep only a few representative examples (prefer those - with file paths/line numbers) plus any summary/count. -- Always keep at least 1 line if any output exists. -- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate). - -Example: - -- Numbered output: - - 0001| building... - - 0002| ERROR: expected X, got Y - - 0003| at path/to/file.ts:12:3 - - 0004| done -- Tool call: - - system1_keep_ranges({"keep_ranges":[{"start":2,"end":3,"reason":"error"}]}) diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index 14298caf16..91ecb0f8dc 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -4,11 +4,10 @@ export const BUILTIN_AGENT_CONTENT = { "compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n", - "desktop": "---\nname: Desktop\ndescription: Visual desktop automation agent for GUI-heavy, screenshot-intensive workflows\nbase: exec\nui:\n hidden: true\n routable: true\n requires:\n - desktop\nsubagent:\n runnable: true\n append_prompt: |\n You are a desktop automation sub-agent running in a child workspace.\n\n - Your job: interact with the desktop GUI via screenshot-driven automation.\n - Always take a screenshot before starting a GUI interaction sequence.\n - Follow the grounding loop: screenshot → identify target → act → screenshot to verify.\n - After completing the task, summarize the outcome back to the parent with only\n the result plus selected evidence (e.g., a final screenshot path).\n - Do not expand scope beyond the delegated desktop task.\n - Call `agent_report` exactly once when done.\nprompt:\n append: true\nai:\n thinkingLevel: medium\ntools:\n add:\n - desktop_screenshot\n - desktop_move_mouse\n - desktop_click\n - desktop_double_click\n - desktop_drag\n - desktop_scroll\n - desktop_type\n - desktop_key_press\n remove:\n # Desktop agent should not recursively orchestrate child agents\n - task\n - task_await\n - task_list\n - task_terminate\n - task_apply_git_patch\n # No planning tools\n - propose_plan\n - ask_user_question\n # Internal-only\n - system1_keep_ranges\n # Global config and catalog tools\n - mux_agents_.*\n - agent_skill_write\n---\n\nYou are a desktop automation agent.\n\n- **Screenshot-first rule:** Always take a `desktop_screenshot` before beginning any GUI interaction loop. Never act on stale visual state.\n- **Grounding loop:** Follow `screenshot → identify target coordinates → act (click/type/drag) → screenshot to verify` for each major interaction. Every major interaction step should end with a screenshot to verify the expected result.\n- **Coordinate precision:** Use screenshot analysis to identify precise pixel coordinates for clicks, drags, and other positional actions. Account for window position, display scaling, and DPI before acting.\n- **Defensive interaction patterns:**\n - Wait briefly after clicks before verifying because menus and dialogs may animate.\n - For text input, click the target field first, verify focus, then type.\n - For drag operations, verify both the start and end positions with screenshots.\n - If an unexpected dialog or popup appears, take another screenshot and adapt to the new state.\n- **Scrolling:** Use `desktop_scroll` to navigate within windows, then take a screenshot after scrolling to verify the new content is visible.\n- **Error recovery:** If an action does not produce the expected result, take another screenshot, reassess the current state, and retry with adjusted coordinates.\n- **Reporting:** When complete, summarize only the outcome and key evidence back to the parent agent, such as the final screenshot confirming success. Do not send raw coordinate logs.\n", - "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n You are running as a sub-agent in a child workspace.\n\n - Take a single narrowly scoped task and complete it end-to-end. Do not expand scope.\n - If the task brief includes clear starting points and acceptance criteria (or a concrete approved plan handoff) — implement it directly.\n Do not spawn `explore` tasks or write a \"mini-plan\" unless you are concretely blocked by a missing fact (e.g., a file path that doesn't exist, an unknown symbol name, or an error that contradicts the brief).\n - When you do need repo context you don't have, prefer 1–3 narrow `explore` tasks (possibly in parallel) over broad manual file-reading.\n - If the task brief is missing critical information (scope, acceptance, or starting points) and you cannot infer it safely after a quick `explore`, do not guess.\n Stop and call `agent_report` once with 1–3 concrete questions/unknowns for the parent agent, and do not create commits.\n - Run targeted verification and create one or more git commits.\n - Never amend existing commits — always create new commits on top.\n - **Before your stream ends, you MUST call `agent_report` exactly once with:**\n - What changed (paths / key details)\n - What you ran (tests, typecheck, lint)\n - Any follow-ups / risks\n (If you forget, the parent will inject a follow-up message and you'll waste tokens.)\n - You may call task/task_await/task_list/task_terminate to delegate further when available.\n Delegation is limited by Max Task Nesting Depth (Settings → Agents → Task Settings).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n # Global config and catalog tools stay out of general-purpose agents\n - mux_agents_.*\n - agent_skill_write\n - agent_skill_delete\n - mux_config_read\n - mux_config_write\n - skills_catalog_.*\n - analytics_query\n---\n\nYou are in Exec mode.\n\n- If an accepted `` block is provided, treat it as the contract and implement it directly. Only do extra exploration if the plan references non-existent files/symbols or if errors contradict it.\n- Use `explore` sub-agents just-in-time for missing repo context (paths/symbols/tests); don't spawn them by default.\n- Trust Explore sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence.\n- For correctness claims, an Explore sub-agent report counts as having read the referenced files.\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n\n## Desktop Automation\n\nWhen a task involves repeated screenshot/action/verify loops for desktop GUI interaction (for example, clicking through application UIs, filling desktop app forms, or visually verifying GUI state), delegate to the `desktop` agent via `task` rather than performing desktop automation inline. The desktop agent is purpose-built for the screenshot → act → verify grounding loop.\n", + "desktop": "---\nname: Desktop\ndescription: Visual desktop automation agent for GUI-heavy, screenshot-intensive workflows\nbase: exec\nui:\n hidden: true\n routable: true\n requires:\n - desktop\nsubagent:\n runnable: true\n append_prompt: |\n You are a desktop automation sub-agent running in a child workspace.\n\n - Your job: interact with the desktop GUI via screenshot-driven automation.\n - Always take a screenshot before starting a GUI interaction sequence.\n - Follow the grounding loop: screenshot → identify target → act → screenshot to verify.\n - After completing the task, summarize the outcome back to the parent with only\n the result plus selected evidence (e.g., a final screenshot path).\n - Do not expand scope beyond the delegated desktop task.\n - Call `agent_report` exactly once when done.\nprompt:\n append: true\nai:\n thinkingLevel: medium\ntools:\n add:\n - desktop_screenshot\n - desktop_move_mouse\n - desktop_click\n - desktop_double_click\n - desktop_drag\n - desktop_scroll\n - desktop_type\n - desktop_key_press\n remove:\n # Desktop agent should not recursively orchestrate child agents\n - task\n - task_await\n - task_list\n - task_terminate\n - task_apply_git_patch\n # No planning tools\n - propose_plan\n - ask_user_question\n # Global config and catalog tools\n - mux_agents_.*\n - agent_skill_write\n---\n\nYou are a desktop automation agent.\n\n- **Screenshot-first rule:** Always take a `desktop_screenshot` before beginning any GUI interaction loop. Never act on stale visual state.\n- **Grounding loop:** Follow `screenshot → identify target coordinates → act (click/type/drag) → screenshot to verify` for each major interaction. Every major interaction step should end with a screenshot to verify the expected result.\n- **Coordinate precision:** Use screenshot analysis to identify precise pixel coordinates for clicks, drags, and other positional actions. Account for window position, display scaling, and DPI before acting.\n- **Defensive interaction patterns:**\n - Wait briefly after clicks before verifying because menus and dialogs may animate.\n - For text input, click the target field first, verify focus, then type.\n - For drag operations, verify both the start and end positions with screenshots.\n - If an unexpected dialog or popup appears, take another screenshot and adapt to the new state.\n- **Scrolling:** Use `desktop_scroll` to navigate within windows, then take a screenshot after scrolling to verify the new content is visible.\n- **Error recovery:** If an action does not produce the expected result, take another screenshot, reassess the current state, and retry with adjusted coordinates.\n- **Reporting:** When complete, summarize only the outcome and key evidence back to the parent agent, such as the final screenshot confirming success. Do not send raw coordinate logs.\n", + "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n You are running as a sub-agent in a child workspace.\n\n - Take a single narrowly scoped task and complete it end-to-end. Do not expand scope.\n - If the task brief includes clear starting points and acceptance criteria (or a concrete approved plan handoff) — implement it directly.\n Do not spawn `explore` tasks or write a \"mini-plan\" unless you are concretely blocked by a missing fact (e.g., a file path that doesn't exist, an unknown symbol name, or an error that contradicts the brief).\n - When you do need repo context you don't have, prefer 1–3 narrow `explore` tasks (possibly in parallel) over broad manual file-reading.\n - If the task brief is missing critical information (scope, acceptance, or starting points) and you cannot infer it safely after a quick `explore`, do not guess.\n Stop and call `agent_report` once with 1–3 concrete questions/unknowns for the parent agent, and do not create commits.\n - Run targeted verification and create one or more git commits.\n - Never amend existing commits — always create new commits on top.\n - **Before your stream ends, you MUST call `agent_report` exactly once with:**\n - What changed (paths / key details)\n - What you ran (tests, typecheck, lint)\n - Any follow-ups / risks\n (If you forget, the parent will inject a follow-up message and you'll waste tokens.)\n - You may call task/task_await/task_list/task_terminate to delegate further when available.\n Delegation is limited by Max Task Nesting Depth (Settings → Agents → Task Settings).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Global config and catalog tools stay out of general-purpose agents\n - mux_agents_.*\n - agent_skill_write\n - agent_skill_delete\n - mux_config_read\n - mux_config_write\n - skills_catalog_.*\n - analytics_query\n---\n\nYou are in Exec mode.\n\n- If an accepted `` block is provided, treat it as the contract and implement it directly. Only do extra exploration if the plan references non-existent files/symbols or if errors contradict it.\n- Use `explore` sub-agents just-in-time for missing repo context (paths/symbols/tests); don't spawn them by default.\n- Trust Explore sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence.\n- For correctness claims, an Explore sub-agent report counts as having read the referenced files.\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n\n## Desktop Automation\n\nWhen a task involves repeated screenshot/action/verify loops for desktop GUI interaction (for example, clicking through application UIs, filling desktop app forms, or visually verifying GUI state), delegate to the `desktop` agent via `task` rather than performing desktop automation inline. The desktop agent is purpose-built for the screenshot → act → verify grounding loop.\n", "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent; skill tools are kept)\n remove:\n - file_edit_.*\n - task\n - task_apply_git_patch\n - task_.*\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT manually create, edit, delete, move, copy, or rename tracked files.\n- You MUST NOT stage/commit or otherwise modify git state.\n- You MUST NOT use redirect operators (>, >>) or heredocs to write to files.\n - Pipes are allowed for processing, but MUST NOT be used to write to files (for example via `tee`).\n- You MUST NOT run commands that are explicitly about modifying the filesystem or repo state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- You MAY run verification commands (fmt-check/lint/typecheck/test) even if they create build artifacts/caches, but they MUST NOT modify tracked files.\n - After running verification, check `git status --porcelain` and report if it is non-empty.\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash for read-only operations (rg, ls, git diff/show/log, etc.) and verification commands.\n", "name_workspace": "---\nname: Name Workspace\ndescription: Generate workspace name and title from user message\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n require:\n - propose_name\n---\n\nYou are a workspace naming assistant. Your only job is to call the `propose_name` tool with a suitable name and title.\n\nDo not emit text responses. Call the `propose_name` tool immediately.\n", "orchestrator": "---\nname: Orchestrator\ndescription: Coordinate sub-agent implementation and apply patches\nbase: exec\nsubagent:\n runnable: false\n append_prompt: |\n You are running as a sub-agent orchestrator in a child workspace.\n\n - Your parent workspace handles all PR management.\n Do NOT create pull requests, push to remote branches, or run any\n `gh pr` / `git push` commands. This applies even if AGENTS.md or\n other instructions say otherwise — those PR instructions target the\n top-level workspace only.\n - Orchestrate your delegated subtasks (spawn, await, apply patches,\n verify locally), then call `agent_report` exactly once with:\n - What changed (paths / key details)\n - What you ran (tests, typecheck, lint)\n - Any follow-ups / risks\n - Do not expand scope beyond the delegated task.\ntools:\n add:\n - ask_user_question\n remove:\n - propose_plan\n # Keep Orchestrator focused on coordination: no direct file edits.\n - file_edit_.*\n---\n\nYou are an internal Orchestrator agent running in Exec mode.\n\n**Mission:** coordinate implementation by delegating investigation + coding to sub-agents, then integrating their patches into this workspace.\n\nWhen a plan is present (default):\n\n- Treat the accepted plan as the source of truth. Its file paths, symbols, and structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths or appears to have been authored/edited by the user without planner validation, a single targeted `explore` to sanity-check critical paths is acceptable.\n- Spawning `explore` to gather _additional_ context beyond what the plan provides is encouraged (e.g., checking whether a helper already exists, locating test files not mentioned in the plan, discovering existing patterns to match). This produces better implementation task briefs.\n- Do not spawn `explore` just to verify that a planner-generated plan is correct — that is the planner's job, and the plan was accepted by the user.\n- Convert the plan into concrete implementation subtasks and start delegation (`exec` for low complexity, `plan` for higher complexity).\n\nWhat you are allowed to do directly in this workspace:\n\n- Spawn/await/manage sub-agent tasks (`task`, `task_await`, `task_list`, `task_terminate`).\n- Apply patches (`task_apply_git_patch`).\n- Use `bash` for orchestration workflows: repo coordination via `git`/`gh`, targeted post-apply verification runs, and waiting on review/CI completion after PR updates (for example: `git push`, `gh pr comment`, `gh pr view`, `gh pr checks --watch`). Only run `gh pr create` when the user explicitly asks you to open a PR.\n- Ask clarifying questions with `ask_user_question` when blocked.\n- Coordinate targeted verification after integrating patches by running focused checks directly (when appropriate) or delegating runs to `explore`/`exec`.\n- Delegate patch-conflict reconciliation to `exec` sub-agents.\n\nHard rules (delegate-first):\n\n- Trust `explore` sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence.\n- For correctness claims, an `explore` sub-agent report counts as having read the referenced files.\n- **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt (keeps this agent focused on coordination).\n- **Do not implement features/bugfixes directly here.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end.\n- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec`/`plan` sub-agents instead of implementing changes here.\n- **Never read or scan session storage.** This includes `~/.mux/sessions/**` and `~/.mux/sessions/subagent-patches/**`. Treat session storage as an internal implementation detail; do not shell out to locate patch artifacts on disk. Only use `task_apply_git_patch` to access patches.\n\nDelegation guide:\n\n- Use `explore` for narrowly-scoped read-only questions (confirm an assumption, locate a symbol/callsite, find relevant tests). Avoid \"scan the repo\" prompts.\n- Use `exec` for straightforward, low-complexity work where the implementation path is obvious from the task brief.\n - Good fit: single-file edits, localized wiring to existing helpers, straightforward command execution, or narrowly scoped follow-ups with clear acceptance.\n - Provide a compact task brief (so the sub-agent can act without reading the full plan) with:\n - Task: one sentence\n - Background (why this matters): 1–3 bullets\n - Scope / non-goals: what to change, and what not to change\n - Starting points: relevant files/symbols/paths (from prior exploration)\n - Acceptance: bullets / checks\n - Deliverables: commits + verification commands to run\n - Constraints:\n - Do not expand scope.\n - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation.\n Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.\n If starting points + acceptance are already clear, skip initial explore and only explore when blocked.\n - Create one or more git commits before `agent_report`.\n- Use `plan` for higher-complexity subtasks that touch multiple files/locations, require non-trivial investigation, or have an unclear implementation approach.\n - Default to `plan` when a subtask needs coordinated updates across multiple locations, unless the edits are mechanical and already fully specified.\n - For higher-complexity implementation work, prefer `plan` over `exec` so the sub-agent can do targeted research and produce a precise plan before implementation begins.\n - Good fit: multi-file refactors, cross-module behavior changes, unfamiliar subsystems, or work where sequencing/dependencies need discovery.\n - Plan subtasks automatically hand off to implementation after a successful `propose_plan`; expect the usual task completion output once implementation finishes.\n - For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions.\n- Use `desktop` for GUI-heavy desktop automation that requires repeated screenshot → act → verify loops (for example, interacting with application windows, clicking through UI flows, or visual verification). The desktop agent enforces a grounding discipline that keeps visual context local.\n\nRecommended Orchestrator → Exec task brief template:\n\n- Task: \n- Background (why this matters):\n - \n- Scope / non-goals:\n - Scope: \n - Non-goals: \n- Starting points: \n- Dependencies / assumptions:\n - Assumes: \n - If unmet: stop and report back; do not expand scope to create prerequisites.\n- Acceptance: \n- Deliverables:\n - Commits: \n - Verification: \n- Constraints:\n - Do not expand scope.\n - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation.\n Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.\n If starting points + acceptance are already clear, skip initial explore and only explore when blocked.\n - Create one or more git commits before `agent_report`.\n\nDependency analysis (required before spawning implementation tasks — `exec` or `plan`):\n\n- For each candidate subtask, write:\n - Outputs: files/targets/artifacts introduced/renamed/generated\n - Inputs / prerequisites (including for verification): what must already exist\n- A subtask is \"independent\" only if its patch can be applied + verified on the current parent workspace HEAD, without any other pending patch.\n- Parallelism is the default: maximize the size of each independent batch and run it in parallel.\n Use the sequential protocol only when a subtask has a concrete prerequisite on another subtask's outputs.\n- If task B depends on outputs from task A:\n - Do not spawn B until A has completed and A's patch is applied in the parent workspace.\n - If the dependency chain is tight (download → generate → wire-up), prefer one `exec` task rather than splitting.\n\nExample dependency chain (schema download → generation):\n\n- Task A outputs: a new download target + new schema files.\n- Task B inputs: those schema files; verifies by running generation.\n- Therefore: run Task A (await + apply patch) before spawning Task B.\n\nPatch integration loop (default):\n\n1. Identify a batch of independent subtasks.\n2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity).\n3. Await the batch via `task_await`.\n4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches one at a time:\n - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`.\n - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results.\n - Dry-run apply: `task_apply_git_patch` with `dry_run: true`.\n - If dry-run succeeds, immediately apply for real: `task_apply_git_patch` with `dry_run: false`.\n - Do not assume an inline `status: completed` result means the child changes are already present in this workspace.\n - If dry-run fails, treat it as a patch conflict and delegate reconciliation:\n 1. Do not attempt a real apply for that patch in this workspace.\n 2. Spawn a dedicated `exec` task. In the brief, include the original failing `task_id` and instruct the sub-agent to replay that patch via `task_apply_git_patch`, resolve conflicts in its own workspace, run `git am --continue`, commit the resolved result, and report back with a new patch to apply cleanly.\n - If real apply fails unexpectedly:\n 1. Restore a clean working tree before delegating: run `git am --abort` via `bash` only when a git-am session is in progress; if abort reports no operation in progress, continue.\n 2. Then follow the same delegated reconciliation flow above.\n5. Verify + review:\n - Run focused verification directly with `bash` when practical (for example: targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely.\n - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks.\n - PASS: summary-only (no long logs).\n - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify.\n\nSequential protocol (only for dependency chains):\n\n1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`.\n2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application.\n3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress).\n4. Only after the patch is applied, spawn the dependent implementation task.\n5. Repeat until the dependency chain is complete.\n\nNote: child workspaces are created at spawn time. Spawning dependents too early means they work from the wrong repo snapshot and get forced into scope expansion.\n\nKeep context minimal:\n\n- Do not request, paste, or restate large plans.\n- Prefer short, actionable prompts, but include enough context that the sub-agent does not need your plan file.\n - Child workspaces do not automatically have access to the parent's plan file; summarize just the relevant slice or provide file pointers.\n- Prefer file paths/symbols over long prose.\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: true\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Plan should not apply sub-agent patches.\n - task_apply_git_patch\n # Global config and catalog tools stay out of general-purpose agents\n - mux_agents_.*\n - agent_skill_write\n - agent_skill_delete\n - mux_config_read\n - mux_config_write\n - skills_catalog_.*\n - analytics_query\n require:\n - propose_plan\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan.\n- Match the plan's size and structure to the problem.\n- Keep the plan self-contained and scannable.\n- Assume the user wants the completed plan, not a description of how you would make one.\n\n## Investigate only what you need\n\nBefore proposing a plan, figure out what you need to verify and gather that evidence.\n\n- When delegation is available, use Explore sub-agents for repo investigation. In Plan Mode, only\n spawn `agentId: \"explore\"` tasks.\n- Give each Explore task specific deliverables, and parallelize them when that helps.\n- Trust completed Explore reports for repo facts. Do not re-investigate just to second-guess them.\n If something is missing, ambiguous, or conflicting, spawn another focused Explore task.\n- If task delegation is unavailable, do the narrowest read-only investigation yourself.\n- Reserve `file_read` for the plan file itself, user-provided text already in this conversation,\n and that narrow fallback. When reading the plan file, prefer `file_read` over `bash cat` so long\n plans do not get compacted.\n- Wait for any spawned Explore tasks before calling `propose_plan`.\n\n## Write the plan\n\n- Use whatever structure best fits the problem: a few bullets, phases, workstreams, risks, or\n decision points are all fine.\n- Include the context, constraints, evidence, and concrete path forward somewhere in that\n structure.\n- Name the files, symbols, or subsystems that matter, and order the work so an implementer can\n follow it.\n- Keep uncertainty brief and local to the relevant step. Use `ask_user_question` when you need the\n user to decide something.\n- Include small code snippets only when they materially reduce ambiguity.\n- Put long rationale or background into `
/` blocks.\n\n## Questions and handoff\n\n- If you need clarification from the user, use `ask_user_question` instead of asking in chat or\n adding an \"Open Questions\" section to the plan.\n- Ask up to 4 questions at a time (2–4 options each; \"Other\" remains available for free-form\n input).\n- After you get answers, update the plan and then call `propose_plan` when it is ready for review.\n- After calling `propose_plan`, do not paste the plan into chat or mention the plan file path.\n- If the user wants edits to other files, ask them to switch to Exec mode.\n\nWorkspace-specific runtime instructions (plan file path, edit restrictions, nesting warnings) are\nprovided separately.\n", - "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", }; diff --git a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts index dea3884c13..24ec1bf1cc 100644 --- a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts +++ b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts @@ -20,7 +20,6 @@ const BUILT_IN_SOURCES: BuiltInSource[] = [ { id: "compact", content: BUILTIN_AGENT_CONTENT.compact }, { id: "desktop", content: BUILTIN_AGENT_CONTENT.desktop }, { id: "explore", content: BUILTIN_AGENT_CONTENT.explore }, - { id: "system1_bash", content: BUILTIN_AGENT_CONTENT.system1_bash }, { id: "name_workspace", content: BUILTIN_AGENT_CONTENT.name_workspace }, { id: "orchestrator", content: BUILTIN_AGENT_CONTENT.orchestrator }, ]; diff --git a/src/node/services/agentSession.startupAutoRetry.test.ts b/src/node/services/agentSession.startupAutoRetry.test.ts index 5ae543b8e4..6ffd35e4b0 100644 --- a/src/node/services/agentSession.startupAutoRetry.test.ts +++ b/src/node/services/agentSession.startupAutoRetry.test.ts @@ -436,8 +436,6 @@ describe("AgentSession startup auto-retry recovery", () => { model: "anthropic:claude-sonnet-4-5", agentId: "exec", thinkingLevel: "high", - system1ThinkingLevel: "low", - system1Model: "openai:gpt-4o-mini", toolPolicy: [{ regex_match: "bash", action: "disable" }], additionalSystemInstructions: "Use one sentence.", maxOutputTokens: 2048, @@ -447,7 +445,6 @@ describe("AgentSession startup auto-retry recovery", () => { use1MContextModels: ["anthropic:claude-sonnet-4-5"], }, }, - experiments: { system1: true }, disableWorkspaceAgents: true, }, }) @@ -477,13 +474,10 @@ describe("AgentSession startup auto-retry recovery", () => { expect(retryOptions.options.model).toBe("anthropic:claude-sonnet-4-5"); expect(retryOptions.options.agentId).toBe("exec"); expect(retryOptions.options.thinkingLevel).toBe("high"); - expect(retryOptions.options.system1ThinkingLevel).toBe("low"); - expect(retryOptions.options.system1Model).toBe("openai:gpt-4o-mini"); expect(retryOptions.options.additionalSystemInstructions).toBe("Use one sentence."); expect(retryOptions.options.maxOutputTokens).toBe(2048); expect(retryOptions.options.toolPolicy).toEqual([{ regex_match: "bash", action: "disable" }]); expect(retryOptions.options.disableWorkspaceAgents).toBe(true); - expect(retryOptions.options.experiments?.system1).toBe(true); expect(retryOptions.options.providerOptions?.anthropic?.use1MContext).toBe(true); session.dispose(); diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index 3f44910490..ac28585138 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -1165,13 +1165,6 @@ export class AgentSession { coerceThinkingLevel(lastAssistantMessage?.metadata?.thinkingLevel) ?? coerceThinkingLevel(agentSettings?.thinkingLevel); - const persistedSystem1ThinkingLevel = coerceThinkingLevel( - persistedRetrySendOptions?.system1ThinkingLevel - ); - const persistedSystem1Model = this.normalizeStartupModel( - persistedRetrySendOptions?.system1Model - ); - const persistedToolPolicy = lastUserMessage?.metadata?.toolPolicy ?? persistedRetrySendOptions?.toolPolicy; const persistedDisableWorkspaceAgents = @@ -1214,13 +1207,6 @@ export class AgentSession { if (persistedExperiments) { compactionRequest.experiments = persistedExperiments; } - if (persistedSystem1ThinkingLevel) { - compactionRequest.system1ThinkingLevel = persistedSystem1ThinkingLevel; - } - if (persistedSystem1Model) { - compactionRequest.system1Model = persistedSystem1Model; - } - if (persistedRetrySendOptions?.agentInitiated === true) { compactionRequest.agentInitiated = true; } @@ -1235,12 +1221,6 @@ export class AgentSession { if (baseThinkingLevel) { retryRequest.thinkingLevel = baseThinkingLevel; } - if (persistedSystem1ThinkingLevel) { - retryRequest.system1ThinkingLevel = persistedSystem1ThinkingLevel; - } - if (persistedSystem1Model) { - retryRequest.system1Model = persistedSystem1Model; - } if (persistedToolPolicy) { retryRequest.toolPolicy = persistedToolPolicy; } @@ -3001,13 +2981,9 @@ export class AgentSession { : normalizeToCanonical(trimmedModelString); }; - const system1Model = options.system1Model?.trim(); - return { ...options, model: normalizeModelSelection(options.model), - system1Model: - system1Model && system1Model.length > 0 ? normalizeModelSelection(system1Model) : undefined, }; } @@ -3188,8 +3164,6 @@ export class AgentSession { changedFileAttachments.length > 0 ? changedFileAttachments : undefined, postCompactionAttachments, experiments: options?.experiments, - system1Model: options?.system1Model, - system1ThinkingLevel: options?.system1ThinkingLevel, disableWorkspaceAgents: options?.disableWorkspaceAgents, hasQueuedMessage: () => !this.messageQueue.isEmpty() && this.messageQueue.getQueueDispatchMode() === "tool-end", @@ -4774,10 +4748,6 @@ export class AgentSession { agentId: targetAgentId, // Preserve relevant settings from the original request ...(effectiveThinkingLevel != null && { thinkingLevel: effectiveThinkingLevel }), - ...(currentOptions?.system1ThinkingLevel != null && { - system1ThinkingLevel: currentOptions.system1ThinkingLevel, - }), - ...(currentOptions?.system1Model != null && { system1Model: currentOptions.system1Model }), ...(followUpProviderOptions != null && { providerOptions: followUpProviderOptions, }), diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index cf8e747aa6..74887797ce 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -917,8 +917,6 @@ export const BUILTIN_SKILL_FILES: Record> = { " # Exec mode doesn't use planning tools", " - propose_plan", " - ask_user_question", - " # Internal-only tools", - " - system1_keep_ranges", " # Global config and catalog tools stay out of general-purpose agents", " - mux_agents_.*", " - agent_skill_write", @@ -1287,8 +1285,6 @@ export const BUILTIN_SKILL_FILES: Record> = { " # No planning tools", " - propose_plan", " - ask_user_question", - " # Internal-only", - " - system1_keep_ranges", " # Global config and catalog tools", " - mux_agents_.*", " - agent_skill_write", @@ -1386,80 +1382,6 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "", "", - "### System1 Bash (internal)", - "", - "**Fast bash-output filtering (internal)**", - "", - '', - "", - "```md", - "---", - "name: System1 Bash", - "description: Fast bash-output filtering (internal)", - "ui:", - " hidden: true", - "subagent:", - " runnable: false", - "tools:", - " add:", - " - system1_keep_ranges", - "---", - "", - "You are a fast bash-output filtering assistant.", - "", - "You will be given:", - "", - "- `maxKeptLines` (budget)", - "- `Display name` (optional): a short intent label for the command", - "- `Bash script`", - "- `Numbered output`", - "", - "Given the numbered output, decide which lines to keep so the user sees the most relevant information.", - "", - "IMPORTANT:", - "", - "- You MUST call `system1_keep_ranges` exactly once.", - "- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).", - "", - "Rules:", - "", - "- Line numbers are 1-based indices into the numbered output.", - "- Use the `Display name` and `Bash script` as intent hints.", - "- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping", - " representative file paths/matches and any summary/counts (not just errors).", - "- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.", - "- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra", - " denoising: prefer keeping most/all lines within the budget.", - "- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.", - "- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest", - " next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,", - " and conflict markers instead.", - "- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when", - " the hint is the only clue explaining a blocking state.", - "- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.", - "- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just", - " to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).", - "- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning", - " (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only", - " the most informative instance plus minimal surrounding context.", - "- If there are many similar warnings/errors, keep only a few representative examples (prefer those", - " with file paths/line numbers) plus any summary/count.", - "- Always keep at least 1 line if any output exists.", - "- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).", - "", - "Example:", - "", - "- Numbered output:", - " - 0001| building...", - " - 0002| ERROR: expected X, got Y", - " - 0003| at path/to/file.ts:12:3", - " - 0004| done", - "- Tool call:", - ' - system1_keep_ranges({"keep_ranges":[{"start":2,"end":3,"reason":"error"}]})', - "```", - "", - "", - "", "{/* END BUILTIN_AGENTS */}", "", "## Related Docs", @@ -4484,18 +4406,6 @@ export const BUILTIN_SKILL_FILES: Record> = { "
", "", "
", - "system1_keep_ranges (4)", - "", - "| Env var | JSON path | Type | Description |", - "| ------------------------------------------- | ----------------------------- | ------ | ------------------------------------------------------- |", - "| `MUX_TOOL_INPUT_KEEP_RANGES__END` | `keep_ranges[].end` | number | 1-based end line (inclusive) in the numbered output |", - "| `MUX_TOOL_INPUT_KEEP_RANGES__REASON` | `keep_ranges[].reason` | string | Optional short reason for keeping this range |", - "| `MUX_TOOL_INPUT_KEEP_RANGES__START` | `keep_ranges[].start` | number | 1-based start line (inclusive) in the numbered output |", - "| `MUX_TOOL_INPUT_KEEP_RANGES_COUNT` | `keep_ranges.length` | number | Number of elements in keep_ranges (Line ranges to keep) |", - "", - "
", - "", - "
", "task (8)", "", "| Env var | JSON path | Type | Description |", diff --git a/src/node/services/aiService.test.ts b/src/node/services/aiService.test.ts index 47f2c4bb25..ed61ba5848 100644 --- a/src/node/services/aiService.test.ts +++ b/src/node/services/aiService.test.ts @@ -60,7 +60,6 @@ import { createDisplayUsage } from "@/common/utils/tokens/displayUsage"; import { normalizeToCanonical } from "@/common/utils/ai/models"; import * as toolsModule from "@/common/utils/tools/tools"; import * as providerOptionsModule from "@/common/utils/ai/providerOptions"; -import * as system1ToolWrapperModule from "./system1ToolWrapper"; import * as systemMessageModule from "./systemMessage"; describe("AIService", () => { @@ -1788,69 +1787,6 @@ describe("AIService.streamMessage compaction boundary slicing", () => { expect(Object.prototype.hasOwnProperty.call(initialMetadata, "routeProvider")).toBe(false); }); - it("passes routeProvider into the System1 wrapper when System1 reuses the primary model", async () => { - using muxHome = new DisposableTempDir("ai-service-system1-route-provider"); - const projectPath = path.join(muxHome.path, "project"); - await fs.mkdir(projectPath, { recursive: true }); - - const workspaceId = "workspace-system1-route-provider"; - const metadata = createWorkspaceMetadata(workspaceId, projectPath); - const harness = createHarness(muxHome.path, metadata, { routeProvider: "openrouter" }); - - let receivedWrapOptions: - | Parameters[0] - | undefined; - spyOn(system1ToolWrapperModule, "wrapToolsWithSystem1").mockImplementation((options) => { - receivedWrapOptions = options; - return options.tools; - }); - - const result = await harness.service.streamMessage({ - messages: [createMuxMessage("latest-user", "user", "continue")], - workspaceId, - modelString: "openai:gpt-5.2", - thinkingLevel: "medium", - experiments: { system1: true }, - }); - - expect(result.success).toBe(true); - expect(receivedWrapOptions).toBeDefined(); - expect(receivedWrapOptions?.routeProvider).toBe("openrouter"); - expect(receivedWrapOptions?.system1Model).toBeUndefined(); - }); - - it("passes routeProvider into the System1 wrapper when System1 uses an explicit canonical model", async () => { - using muxHome = new DisposableTempDir("ai-service-system1-canonical-route-provider"); - const projectPath = path.join(muxHome.path, "project"); - await fs.mkdir(projectPath, { recursive: true }); - - const workspaceId = "workspace-system1-canonical-route-provider"; - const metadata = createWorkspaceMetadata(workspaceId, projectPath); - const harness = createHarness(muxHome.path, metadata, { routeProvider: "openrouter" }); - - let receivedWrapOptions: - | Parameters[0] - | undefined; - spyOn(system1ToolWrapperModule, "wrapToolsWithSystem1").mockImplementation((options) => { - receivedWrapOptions = options; - return options.tools; - }); - - const result = await harness.service.streamMessage({ - messages: [createMuxMessage("latest-user", "user", "continue")], - workspaceId, - modelString: "openai:gpt-5.2", - thinkingLevel: "medium", - system1Model: "openai:gpt-5.2", - experiments: { system1: true }, - }); - - expect(result.success).toBe(true); - expect(receivedWrapOptions).toBeDefined(); - expect(receivedWrapOptions?.routeProvider).toBe("openrouter"); - expect(receivedWrapOptions?.system1Model).toBe("openai:gpt-5.2"); - }); - it("derives sentinel tool names from assembled post-policy tools", async () => { using muxHome = new DisposableTempDir("ai-service-sentinel-tool-names"); const projectPath = path.join(muxHome.path, "project"); diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 1d89bde878..373269b5e6 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -94,7 +94,6 @@ import type { PTCEventWithParent } from "@/node/services/tools/code_execution"; import { MockAiStreamPlayer } from "./mock/mockAiStreamPlayer"; import { DEVTOOLS_RUN_METADATA_ID_HEADER } from "./devToolsHeaderCapture"; import { ProviderModelFactory, modelCostsIncluded } from "./providerModelFactory"; -import { wrapToolsWithSystem1 } from "./system1ToolWrapper"; import { prepareMessagesForProvider } from "./messagePipeline"; import { resolveAgentForStream } from "./agentResolution"; import { buildPlanInstructions, buildStreamSystemContext } from "./streamContextBuilder"; @@ -137,8 +136,6 @@ export interface StreamMessageOptions { changedFileAttachments?: EditedFileAttachment[]; postCompactionAttachments?: PostCompactionAttachment[] | null; experiments?: SendMessageOptions["experiments"]; - system1Model?: string; - system1ThinkingLevel?: ThinkingLevel; disableWorkspaceAgents?: boolean; hasQueuedMessage?: () => boolean; openaiTruncationModeOverride?: "auto" | "disabled"; @@ -732,8 +729,6 @@ export class AIService extends EventEmitter { changedFileAttachments, postCompactionAttachments, experiments, - system1Model, - system1ThinkingLevel, disableWorkspaceAgents, hasQueuedMessage, openaiTruncationModeOverride, @@ -1863,31 +1858,7 @@ export class AIService extends EventEmitter { const errMsg = getErrorMessage(error); workspaceLog.warn("Failed to capture debug LLM request snapshot", { error: errMsg }); } - const toolsForStream = - experiments?.system1 === true - ? wrapToolsWithSystem1({ - tools, - system1Model, - system1ThinkingLevel, - modelString, - effectiveModelString, - primaryModel: modelResult.data.model, - routeProvider, - muxProviderOptions: effectiveMuxProviderOptions, - workspaceId, - promptCacheScope, - effectiveMode, - planFilePath, - taskSettings, - runtimeTempDir, - runtime, - agentDiscoveryPath, - createModel: (ms, o, createOptions) => - this.createModel(ms, o, { ...(createOptions ?? {}), workspaceId }), - emitBashOutput: (ev) => this.emit("bash-output", ev), - sessionUsageService: this.sessionUsageService, - }) - : tools; + const toolsForStream = tools; // Top-level agents need a belt-and-suspenders toolChoice safety net for // required routing/completion tools. Sub-agents rely on taskService.ts // post-stream recovery when a required tool is skipped. diff --git a/src/node/services/experimentsService.test.ts b/src/node/services/experimentsService.test.ts index d65cfec926..ae12f66608 100644 --- a/src/node/services/experimentsService.test.ts +++ b/src/node/services/experimentsService.test.ts @@ -26,7 +26,7 @@ describe("ExperimentsService", () => { { version: 1, experiments: { - [EXPERIMENT_IDS.SYSTEM_1]: { + [EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING]: { value: "test", fetchedAtMs: Date.now(), }, @@ -58,12 +58,15 @@ describe("ExperimentsService", () => { await service.initialize(); const values = service.getAll(); - expect(values[EXPERIMENT_IDS.SYSTEM_1]).toEqual({ + expect(values[EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING]).toEqual({ value: "test", source: "cache", }); - expect(setFeatureFlagVariant).toHaveBeenCalledWith(EXPERIMENT_IDS.SYSTEM_1, "test"); + expect(setFeatureFlagVariant).toHaveBeenCalledWith( + EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING, + "test" + ); }); test("refreshExperiment updates cache and writes it to disk", async () => { @@ -85,9 +88,9 @@ describe("ExperimentsService", () => { }); await service.initialize(); - await service.refreshExperiment(EXPERIMENT_IDS.SYSTEM_1); + await service.refreshExperiment(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING); - const value = service.getExperimentValue(EXPERIMENT_IDS.SYSTEM_1); + const value = service.getExperimentValue(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING); expect(value.value).toBe("test"); expect(value.source).toBe("posthog"); @@ -97,10 +100,13 @@ describe("ExperimentsService", () => { expect((disk as { version: unknown }).version).toBe(1); expect((disk as { experiments: Record }).experiments).toHaveProperty( - EXPERIMENT_IDS.SYSTEM_1 + EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING ); - expect(setFeatureFlagVariant).toHaveBeenCalledWith(EXPERIMENT_IDS.SYSTEM_1, "test"); + expect(setFeatureFlagVariant).toHaveBeenCalledWith( + EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING, + "test" + ); }); test("persists backend overrides and applies them before remote gating", async () => { @@ -220,11 +226,11 @@ describe("ExperimentsService", () => { await service.initialize(); const values = service.getAll(); - expect(values[EXPERIMENT_IDS.SYSTEM_1]).toEqual({ + expect(values[EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING]).toEqual({ value: null, source: "disabled", }); - expect(service.isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1)).toBe(false); + expect(service.isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING)).toBe(false); }); }); diff --git a/src/node/services/system1/bashCompactionPolicy.test.ts b/src/node/services/system1/bashCompactionPolicy.test.ts deleted file mode 100644 index 39e952a318..0000000000 --- a/src/node/services/system1/bashCompactionPolicy.test.ts +++ /dev/null @@ -1,292 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import { homedir } from "os"; - -import { - classifyBashIntent, - decideBashOutputCompaction, - isBashOutputAlreadyTargeted, -} from "./bashCompactionPolicy"; - -describe("bashCompactionPolicy", () => { - describe("isBashOutputAlreadyTargeted", () => { - it("detects common output-slicing commands", () => { - expect(isBashOutputAlreadyTargeted("sudo head -n 1 some.log")).toBe(true); - expect(isBashOutputAlreadyTargeted("rg foo . | head -n 50")).toBe(true); - expect(isBashOutputAlreadyTargeted("tail -n 100 some.log")).toBe(true); - expect(isBashOutputAlreadyTargeted("sed -n '1,200p' file.txt")).toBe(true); - expect(isBashOutputAlreadyTargeted("awk 'NR>=10 && NR<=20 {print}' file.txt")).toBe(true); - }); - - it("returns false for non-targeted scripts", () => { - expect(isBashOutputAlreadyTargeted("ls -la")).toBe(false); - expect(isBashOutputAlreadyTargeted("rg foo .")).toBe(false); - expect(isBashOutputAlreadyTargeted("git rev-parse HEAD")).toBe(false); - }); - }); - - describe("classifyBashIntent", () => { - it("classifies exploration via display name keywords", () => { - expect(classifyBashIntent({ script: "echo hi", displayName: "List files" })).toBe( - "exploration" - ); - expect(classifyBashIntent({ script: "echo hi", displayName: "Search repo" })).toBe( - "exploration" - ); - }); - - it("classifies exploration via common commands", () => { - expect(classifyBashIntent({ script: "ls -la" })).toBe("exploration"); - expect(classifyBashIntent({ script: "git status --porcelain" })).toBe("exploration"); - expect(classifyBashIntent({ script: "find . -maxdepth 2 -type f" })).toBe("exploration"); - }); - - it("classifies logs for build/test commands", () => { - expect(classifyBashIntent({ script: "make test" })).toBe("logs"); - expect(classifyBashIntent({ script: "bun test" })).toBe("logs"); - }); - }); - - describe("decideBashOutputCompaction", () => { - it("skips when output is below configured thresholds", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "ls", - totalLines: 5, - totalBytes: 1_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.shouldCompact).toBe(false); - expect(decision.skipReason).toBe("below_threshold"); - expect(decision.triggeredByLines).toBe(false); - expect(decision.triggeredByBytes).toBe(false); - }); - - it("skips compaction for already-targeted scripts", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "rg foo . | head -n 50", - totalLines: 200, - totalBytes: 10_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.shouldCompact).toBe(false); - expect(decision.skipReason).toBe("already_targeted_script"); - }); - - it("skips compaction when script reads the configured plan file", () => { - const planFilePath = "~/.mux/plans/my-project/my-workspace.md"; - const scripts = [ - `cat ${planFilePath}`, - `bat ${planFilePath}`, - `python -c "print(open('${planFilePath}').read())"`, - ]; - - for (const script of scripts) { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script, - planFilePath, - totalLines: 200, - totalBytes: 10_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.shouldCompact).toBe(false); - expect(decision.skipReason).toBe("plan_file_in_script"); - } - }); - - it("skips compaction when script and planFilePath use different home path forms", () => { - const homePosix = homedir().replaceAll("\\\\", "/"); - const tildePlanFilePath = "~/.mux/plans/my-project/my-workspace.md"; - const expandedPlanFilePath = `${homePosix}/.mux/plans/my-project/my-workspace.md`; - - const tildeDecision = decideBashOutputCompaction({ - toolName: "bash", - script: `cat ${tildePlanFilePath}`, - planFilePath: expandedPlanFilePath, - totalLines: 200, - totalBytes: 10_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(tildeDecision.shouldCompact).toBe(false); - expect(tildeDecision.skipReason).toBe("plan_file_in_script"); - - const expandedDecision = decideBashOutputCompaction({ - toolName: "bash", - script: `cat ${expandedPlanFilePath}`, - planFilePath: tildePlanFilePath, - totalLines: 200, - totalBytes: 10_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(expandedDecision.shouldCompact).toBe(false); - expect(expandedDecision.skipReason).toBe("plan_file_in_script"); - }); - - it("keeps default compaction behavior for non-plan file scripts", () => { - const planFilePath = "~/.mux/plans/my-project/my-workspace.md"; - const scripts = ["cat ./stdout.log", "cat file | rg needle"]; - - for (const script of scripts) { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script, - planFilePath, - totalLines: 200, - totalBytes: 10_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.shouldCompact).toBe(true); - expect(decision.skipReason).toBeUndefined(); - } - }); - - it("skips compaction for small exploration output", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "ls", - totalLines: 50, - totalBytes: 8_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.intent).toBe("exploration"); - expect(decision.shouldCompact).toBe(false); - expect(decision.skipReason).toBe("exploration_output_small"); - }); - - it("skips compaction for conflict-marker searches when output is within tool limits", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: 'rg "<<<<<<<|=======|>>>>>>>" .', - totalLines: 150, - totalBytes: 10_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.shouldCompact).toBe(false); - expect(decision.skipReason).toBe("conflict_marker_search_within_limits"); - }); - - it("boosts maxKeptLines for conflict-marker searches when output exceeds tool limits", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: 'rg "<<<<<<<|=======|>>>>>>>" .', - totalLines: 400, - totalBytes: 20_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.shouldCompact).toBe(true); - expect(decision.skipReason).toBeUndefined(); - expect(decision.effectiveMaxKeptLines).toBe(300); - }); - - it("respects user maxKeptLines for small exploration output", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "ls", - totalLines: 50, - totalBytes: 8_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 20, - }); - - expect(decision.intent).toBe("exploration"); - expect(decision.shouldCompact).toBe(true); - expect(decision.skipReason).toBeUndefined(); - expect(decision.effectiveMaxKeptLines).toBe(20); - }); - - it("respects user thresholds for small exploration output", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "ls", - totalLines: 50, - totalBytes: 8_000, - minLines: 0, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.intent).toBe("exploration"); - expect(decision.shouldCompact).toBe(true); - expect(decision.skipReason).toBeUndefined(); - expect(decision.effectiveMaxKeptLines).toBe(40); - }); - - it("does not boost maxKeptLines when thresholds are user-set", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "find . -type f", - totalLines: 200, - totalBytes: 14_000, - minLines: 0, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.intent).toBe("exploration"); - expect(decision.shouldCompact).toBe(true); - expect(decision.skipReason).toBeUndefined(); - expect(decision.effectiveMaxKeptLines).toBe(40); - }); - - it("boosts maxKeptLines for large exploration output when using the default budget", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "find . -type f", - totalLines: 200, - totalBytes: 14_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.intent).toBe("exploration"); - expect(decision.shouldCompact).toBe(true); - expect(decision.effectiveMaxKeptLines).toBe(120); - }); - - it("keeps default behavior for logs", () => { - const decision = decideBashOutputCompaction({ - toolName: "bash", - script: "make test", - totalLines: 200, - totalBytes: 14_000, - minLines: 10, - minTotalBytes: 4 * 1024, - maxKeptLines: 40, - }); - - expect(decision.intent).toBe("logs"); - expect(decision.shouldCompact).toBe(true); - expect(decision.effectiveMaxKeptLines).toBe(40); - }); - }); -}); diff --git a/src/node/services/system1/bashCompactionPolicy.ts b/src/node/services/system1/bashCompactionPolicy.ts deleted file mode 100644 index 6efdc2527c..0000000000 --- a/src/node/services/system1/bashCompactionPolicy.ts +++ /dev/null @@ -1,421 +0,0 @@ -import { homedir } from "os"; - -import assert from "@/common/utils/assert"; -import { BASH_HARD_MAX_LINES, BASH_MAX_TOTAL_BYTES } from "@/common/constants/toolLimits"; -import { SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS } from "@/common/types/tasks"; - -export type BashOutputIntent = "exploration" | "logs" | "unknown"; - -export function isBashOutputAlreadyTargeted(script: string): boolean { - assert(typeof script === "string", "script must be a string"); - - const trimmed = script.trim(); - if (trimmed.length === 0) { - return false; - } - - // If the script already limits output to a slice (head/tail/line ranges), further denoising is - // likely to drop exactly what the caller asked to see. - // - // NOTE: Avoid false positives like `git rev-parse HEAD`. - const statementSegments = trimmed - .split(/(?:\r?\n|&&|;)+/) - .map((part) => part.trim()) - .filter(Boolean); - - const slicingCommands = new Set(["head", "tail"]); - for (const statement of statementSegments) { - const pipeSegments = statement - .split("|") - .map((part) => part.trim()) - .filter(Boolean); - - for (const pipeSegment of pipeSegments) { - const tokens = pipeSegment.split(/\s+/).filter(Boolean); - if (tokens.length === 0) { - continue; - } - - const cmd0 = (tokens[0] ?? "").toLowerCase(); - const cmd1 = (tokens[1] ?? "").toLowerCase(); - - if (slicingCommands.has(cmd0)) { - return true; - } - - // Common wrapper: `sudo head ...`. - if ((cmd0 === "sudo" || cmd0 === "command") && slicingCommands.has(cmd1)) { - return true; - } - } - } - - if (/\bsed\b[^\n]*\s-n\s+['"]?\d+\s*,\s*\d+\s*p['"]?/i.test(trimmed)) { - return true; - } - - if (/\bawk\b[^\n]*\bNR\s*(==|!=|>=|<=|>|<)\s*\d+/i.test(trimmed)) { - return true; - } - - return false; -} - -function normalizeDisplayName(displayName: string | undefined): string | undefined { - if (typeof displayName !== "string") { - return undefined; - } - - const trimmed = displayName.trim(); - return trimmed.length > 0 ? trimmed : undefined; -} - -function getFirstNonTrivialCommand(script: string): { cmd: string; args: string[] } | undefined { - const segments = script - .split(/(?:\r?\n|&&|;)+/) - .map((part) => part.trim()) - .filter(Boolean); - - const ignoredCommands = new Set(["cd", "pushd", "popd", "export", "set"]); - - for (const segment of segments) { - const tokens = segment.split(/\s+/).filter(Boolean); - if (tokens.length === 0) { - continue; - } - - const rawCmd = tokens[0] ?? ""; - const cmd = rawCmd.replace(/^\\/, ""); - - if (!cmd || ignoredCommands.has(cmd)) { - continue; - } - - return { cmd, args: tokens.slice(1) }; - } - - return undefined; -} - -export function classifyBashIntent(params: { - script: string; - displayName?: string; -}): BashOutputIntent { - assert(params, "params is required"); - assert(typeof params.script === "string", "script must be a string"); - - const displayName = normalizeDisplayName(params.displayName); - if (displayName) { - const normalized = displayName.toLowerCase(); - if (/\b(list|explore|search|scan)\b/.test(normalized)) { - return "exploration"; - } - } - - const first = getFirstNonTrivialCommand(params.script); - if (first) { - const cmd = first.cmd.toLowerCase(); - const arg0 = first.args[0]?.toLowerCase(); - - const explorationCommands = new Set(["ls", "find", "fd", "tree", "rg", "grep"]); - if (explorationCommands.has(cmd)) { - return "exploration"; - } - - if (cmd === "git" && (arg0 === "ls-files" || arg0 === "status")) { - return "exploration"; - } - - const logCommands = new Set(["make", "bun", "npm", "yarn", "pnpm"]); - if (logCommands.has(cmd)) { - return "logs"; - } - } - - return "unknown"; -} - -function isGitConflictMarkerSearch(script: string): boolean { - assert(typeof script === "string", "script must be a string"); - - const trimmed = script.trim(); - if (trimmed.length === 0) { - return false; - } - - const literalNeedles = ["<<<<<<<", ">>>>>>>", "=======", "|||||||"]; - for (const needle of literalNeedles) { - if (trimmed.includes(needle)) { - return true; - } - } - - // Common regex quantifier forms (used in `rg`/`grep` patterns). - const quantifierNeedles = ["<{7}", ">{7}", "={7}", "|{7}"]; - for (const needle of quantifierNeedles) { - if (trimmed.includes(needle)) { - return true; - } - } - - return false; -} - -function scriptMentionsPlanFile(script: string, planFilePath: string | undefined): boolean { - assert(typeof script === "string", "script must be a string"); - - if (typeof planFilePath !== "string") { - return false; - } - - const trimmedPlanFilePath = planFilePath.trim(); - if (trimmedPlanFilePath.length === 0) { - return false; - } - - const needles = new Set(); - - const addNeedle = (needle: string): void => { - const trimmed = needle.trim(); - if (trimmed.length === 0) { - return; - } - - needles.add(trimmed); - }; - - const addNeedleVariants = (needle: string): void => { - addNeedle(needle); - addNeedle(needle.replaceAll("\\\\", "/")); - }; - - addNeedleVariants(trimmedPlanFilePath); - - const home = homedir(); - const homePosix = home.replaceAll("\\\\", "/"); - - if (trimmedPlanFilePath === "~") { - addNeedleVariants(home); - addNeedleVariants(homePosix); - } else if (trimmedPlanFilePath.startsWith("~/") || trimmedPlanFilePath.startsWith("~\\\\")) { - const suffix = trimmedPlanFilePath.slice(1); - addNeedleVariants(`${home}${suffix}`); - addNeedleVariants(`${homePosix}${suffix.replaceAll("\\\\", "/")}`); - } - - // Also match the `~` form when the configured plan path is already expanded. - for (const candidateHome of [home, homePosix]) { - if (!trimmedPlanFilePath.startsWith(candidateHome)) { - continue; - } - - const suffix = trimmedPlanFilePath.slice(candidateHome.length); - if (suffix.length > 0 && !suffix.startsWith("/") && !suffix.startsWith("\\\\")) { - continue; - } - - addNeedleVariants(`~${suffix}`); - } - - for (const needle of needles) { - if (script.includes(needle)) { - return true; - } - } - - return false; -} - -export type BashOutputCompactionSkipReason = - | "below_threshold" - | "already_targeted_script" - | "plan_file_in_script" - | "exploration_output_small" - | "conflict_marker_search_within_limits"; - -export interface BashOutputCompactionDecision { - shouldCompact: boolean; - skipReason?: BashOutputCompactionSkipReason; - - triggeredByLines: boolean; - triggeredByBytes: boolean; - - alreadyTargeted: boolean; - intent: BashOutputIntent; - - effectiveMaxKeptLines: number; -} - -const EXPLORATION_SKIP_MAX_LINES = 120; -const EXPLORATION_SKIP_MAX_BYTES = 12 * 1024; -const EXPLORATION_BOOST_MAX_KEPT_LINES = 120; - -export function decideBashOutputCompaction(params: { - toolName: string; - script: string; - displayName?: string; - planFilePath?: string; - - totalLines: number; - totalBytes: number; - - minLines: number; - minTotalBytes: number; - maxKeptLines: number; -}): BashOutputCompactionDecision { - assert(params, "params is required"); - assert( - typeof params.toolName === "string" && params.toolName.length > 0, - "toolName must be a non-empty string" - ); - assert(typeof params.script === "string", "script must be a string"); - assert( - typeof params.planFilePath === "string" || typeof params.planFilePath === "undefined", - "planFilePath must be a string if provided" - ); - assert( - Number.isInteger(params.totalLines) && params.totalLines >= 0, - "totalLines must be a non-negative integer" - ); - assert( - Number.isInteger(params.totalBytes) && params.totalBytes >= 0, - "totalBytes must be a non-negative integer" - ); - assert(Number.isInteger(params.minLines) && params.minLines >= 0, "minLines must be >= 0"); - assert( - Number.isInteger(params.minTotalBytes) && params.minTotalBytes >= 0, - "minTotalBytes must be >= 0" - ); - assert( - Number.isInteger(params.maxKeptLines) && params.maxKeptLines > 0, - "maxKeptLines must be a positive integer" - ); - - const triggeredByLines = params.totalLines > params.minLines; - const triggeredByBytes = params.totalBytes > params.minTotalBytes; - - let intent: BashOutputIntent = "unknown"; - let alreadyTargeted = false; - let effectiveMaxKeptLines = params.maxKeptLines; - - if (!triggeredByLines && !triggeredByBytes) { - return { - shouldCompact: false, - skipReason: "below_threshold", - triggeredByLines, - triggeredByBytes, - alreadyTargeted, - intent, - effectiveMaxKeptLines, - }; - } - - if (params.toolName === "bash") { - alreadyTargeted = isBashOutputAlreadyTargeted(params.script); - intent = classifyBashIntent({ script: params.script, displayName: params.displayName }); - - if (scriptMentionsPlanFile(params.script, params.planFilePath)) { - // Plan Mode invariant: the plan file is the source of truth. System1 compaction can drop - // the middle of the document, forcing extra tool calls and/or leading to incorrect plans. - return { - shouldCompact: false, - skipReason: "plan_file_in_script", - triggeredByLines, - triggeredByBytes, - alreadyTargeted, - intent, - effectiveMaxKeptLines, - }; - } - - if (alreadyTargeted) { - return { - shouldCompact: false, - skipReason: "already_targeted_script", - triggeredByLines, - triggeredByBytes, - alreadyTargeted, - intent, - effectiveMaxKeptLines, - }; - } - - const defaultMinLines = - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default; - const defaultMinTotalBytes = - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.default; - const defaultMaxKeptLines = - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.default; - - // If a user has customized compaction settings, respect those limits even for exploration output. - const isDefaultCompactionConfig = - params.minLines === defaultMinLines && - params.minTotalBytes === defaultMinTotalBytes && - params.maxKeptLines === defaultMaxKeptLines; - - const isConflictMarkerSearch = isGitConflictMarkerSearch(params.script); - - if ( - isDefaultCompactionConfig && - isConflictMarkerSearch && - params.totalLines <= BASH_HARD_MAX_LINES && - params.totalBytes <= BASH_MAX_TOTAL_BYTES - ) { - return { - shouldCompact: false, - skipReason: "conflict_marker_search_within_limits", - triggeredByLines, - triggeredByBytes, - alreadyTargeted, - intent, - effectiveMaxKeptLines, - }; - } - - if ( - intent === "exploration" && - params.totalLines <= EXPLORATION_SKIP_MAX_LINES && - params.totalBytes <= EXPLORATION_SKIP_MAX_BYTES - ) { - // Skip the System1 call only when compaction settings are at their defaults. This avoids - // bypassing explicit user limits (e.g. when they've lowered max-kept-lines or forced compaction). - if (isDefaultCompactionConfig) { - return { - shouldCompact: false, - skipReason: "exploration_output_small", - triggeredByLines, - triggeredByBytes, - alreadyTargeted, - intent, - effectiveMaxKeptLines, - }; - } - } - - // Guardrail: only override when the caller still uses the default budget and thresholds. - if (isDefaultCompactionConfig) { - if (isConflictMarkerSearch) { - effectiveMaxKeptLines = BASH_HARD_MAX_LINES; - } else if (intent === "exploration") { - effectiveMaxKeptLines = Math.min( - BASH_HARD_MAX_LINES, - Math.max(params.maxKeptLines, EXPLORATION_BOOST_MAX_KEPT_LINES) - ); - } - } - } - - assert( - Number.isInteger(effectiveMaxKeptLines) && effectiveMaxKeptLines > 0, - "effectiveMaxKeptLines must be a positive integer" - ); - - return { - shouldCompact: true, - triggeredByLines, - triggeredByBytes, - alreadyTargeted, - intent, - effectiveMaxKeptLines, - }; -} diff --git a/src/node/services/system1/bashOutputFiltering.test.ts b/src/node/services/system1/bashOutputFiltering.test.ts deleted file mode 100644 index 22ee09b286..0000000000 --- a/src/node/services/system1/bashOutputFiltering.test.ts +++ /dev/null @@ -1,155 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import { - applySystem1KeepRangesToOutput, - formatNumberedLinesForSystem1, - getHeuristicKeepRangesForBashOutput, - formatSystem1BashFilterNotice, - splitBashOutputLines, -} from "./bashOutputFiltering"; - -describe("bashOutputFiltering", () => { - describe("splitBashOutputLines", () => { - it("returns [] for empty output", () => { - expect(splitBashOutputLines("")).toEqual([]); - }); - - it("splits on newlines", () => { - expect(splitBashOutputLines("a\nb\nc")).toEqual(["a", "b", "c"]); - }); - }); - - describe("formatNumberedLinesForSystem1", () => { - it("adds 1-based line numbers", () => { - expect(formatNumberedLinesForSystem1(["a", "b"]).split("\n")).toEqual(["0001| a", "0002| b"]); - }); - }); - - describe("formatSystem1BashFilterNotice", () => { - it("includes a cleanup warning when fullOutputPath is present", () => { - const notice = formatSystem1BashFilterNotice({ - keptLines: 1, - totalLines: 2, - trigger: "lines", - fullOutputPath: "/tmp/bash-s1.txt", - }); - - expect(notice).toContain("Full output saved to /tmp/bash-s1.txt"); - expect(notice).toContain("automatically cleaned up"); - expect(notice).toContain("may already be gone"); - }); - - it("omits the full output path when fullOutputPath is missing", () => { - const notice = formatSystem1BashFilterNotice({ - keptLines: 1, - totalLines: 2, - trigger: "bytes", - }); - - expect(notice).toBe("Auto-filtered output: kept 1/2 lines (trigger: bytes)."); - }); - }); - - describe("getHeuristicKeepRangesForBashOutput", () => { - it("keeps error context and respects maxKeptLines", () => { - const rawOutput = [ - "starting...", - "step 1 ok", - "ERROR: expected X, got Y", - " at path/to/file.ts:12:3", - "done", - ].join("\n"); - - const lines = splitBashOutputLines(rawOutput); - const keepRanges = getHeuristicKeepRangesForBashOutput({ - lines, - maxKeptLines: 3, - }); - - const applied = applySystem1KeepRangesToOutput({ - rawOutput, - keepRanges, - maxKeptLines: 3, - }); - - expect(applied).toBeDefined(); - expect(applied?.keptLines).toBeLessThanOrEqual(3); - expect(applied?.filteredOutput).toContain("ERROR:"); - }); - - it("treats git conflict markers as important lines", () => { - const rawOutput = [ - "start", - "src/foo.ts:1:<<<<<<< HEAD", - "src/foo.ts:2:=======", - "src/foo.ts:3:>>>>>>> main", - "end", - ].join("\n"); - - const lines = splitBashOutputLines(rawOutput); - const keepRanges = getHeuristicKeepRangesForBashOutput({ - lines, - maxKeptLines: 10, - }); - - const applied = applySystem1KeepRangesToOutput({ - rawOutput, - keepRanges, - maxKeptLines: 10, - }); - - expect(applied).toBeDefined(); - expect(applied?.filteredOutput).toContain("<<<<<<<"); - expect(applied?.filteredOutput).toContain("======="); - expect(applied?.filteredOutput).toContain(">>>>>>>"); - }); - }); - - describe("applySystem1KeepRangesToOutput", () => { - it("returns undefined when keep ranges are empty", () => { - const applied = applySystem1KeepRangesToOutput({ - rawOutput: "a\nb\nc", - keepRanges: [], - maxKeptLines: 10, - }); - expect(applied).toBeUndefined(); - }); - - it("clamps and swaps out-of-order ranges", () => { - const applied = applySystem1KeepRangesToOutput({ - rawOutput: "a\nb\nc\nd\ne", - keepRanges: [{ start: 10, end: 2 }], - maxKeptLines: 10, - }); - - expect(applied).toEqual({ - filteredOutput: "b\nc\nd\ne", - keptLines: 4, - totalLines: 5, - }); - }); - - it("merges overlapping ranges and enforces maxKeptLines", () => { - const applied = applySystem1KeepRangesToOutput({ - rawOutput: "a\nb\nc\nd\ne\nf", - keepRanges: [ - { start: 2, end: 4 }, - { start: 4, end: 6 }, - ], - maxKeptLines: 3, - }); - - expect(applied).toEqual({ - filteredOutput: "b\nc\nd", - keptLines: 3, - totalLines: 6, - }); - - // Subset-only guarantee: every kept line must exist in the original output. - const rawLines = splitBashOutputLines("a\nb\nc\nd\ne\nf"); - const keptLines = splitBashOutputLines(applied!.filteredOutput); - for (const line of keptLines) { - expect(rawLines.includes(line)).toBe(true); - } - }); - }); -}); diff --git a/src/node/services/system1/bashOutputFiltering.ts b/src/node/services/system1/bashOutputFiltering.ts deleted file mode 100644 index 1a0d47cf88..0000000000 --- a/src/node/services/system1/bashOutputFiltering.ts +++ /dev/null @@ -1,228 +0,0 @@ -import assert from "@/common/utils/assert"; -import type { z } from "zod"; -import type { System1KeepRangeSchema } from "@/common/utils/tools/toolDefinitions"; - -// Derived from the Zod schema (single source of truth) to avoid drift. -export type System1KeepRange = z.infer; - -export interface ApplySystem1KeepRangesResult { - filteredOutput: string; - keptLines: number; - totalLines: number; -} - -export function formatSystem1BashFilterNotice(params: { - keptLines: number; - totalLines: number; - trigger: string; - fullOutputPath?: string | undefined; -}): string { - assert( - Number.isInteger(params.keptLines) && params.keptLines >= 0, - "keptLines must be a non-negative integer" - ); - assert( - Number.isInteger(params.totalLines) && params.totalLines >= 0, - "totalLines must be a non-negative integer" - ); - assert(params.keptLines <= params.totalLines, "keptLines must be <= totalLines"); - assert( - typeof params.trigger === "string" && params.trigger.length > 0, - "trigger must be a string" - ); - - const notice = `Auto-filtered output: kept ${params.keptLines}/${params.totalLines} lines (trigger: ${params.trigger}).`; - - if (typeof params.fullOutputPath !== "string" || params.fullOutputPath.length === 0) { - return notice; - } - - return ( - notice + - `\n\nFull output saved to ${params.fullOutputPath}` + - "\n\nFile will be automatically cleaned up when stream ends (it may already be gone)." - ); -} - -export function splitBashOutputLines(output: string): string[] { - if (output.length === 0) { - return []; - } - - // NOTE: Preserve exact line contents (including any \r characters). - return output.split("\n"); -} - -export function formatNumberedLinesForSystem1(lines: string[]): string { - return lines.map((line, index) => `${String(index + 1).padStart(4, "0")}| ${line}`).join("\n"); -} - -const GIT_CONFLICT_MARKER_REGEX = /(<{7}|={7}|>{7}|\|{7})/; - -const HEURISTIC_IMPORTANT_LINE_REGEX = - /(^|\b)(error|failed|failure|fatal|panic|exception|traceback|warning|assertion failed|npm err!|err!|exited with code|exit code)(\b|$)/i; - -const HEURISTIC_CONTEXT_LINES = 2; -const HEURISTIC_MAX_MATCH_RANGES = 50; - -export function getHeuristicKeepRangesForBashOutput(params: { - lines: string[]; - maxKeptLines: number; -}): System1KeepRange[] { - assert(Array.isArray(params.lines), "lines must be an array"); - assert( - Number.isInteger(params.maxKeptLines) && params.maxKeptLines > 0, - "maxKeptLines must be a positive integer" - ); - - const totalLines = params.lines.length; - if (totalLines === 0) { - return []; - } - - // Keep a small head/tail slice so users can see setup and summary. - const headTailLines = Math.max(1, Math.min(5, Math.floor(params.maxKeptLines / 8))); - - const ranges: System1KeepRange[] = []; - - const headEnd = Math.min(totalLines, headTailLines); - if (headEnd > 0) { - ranges.push({ start: 1, end: headEnd, reason: "head" }); - } - - const tailStart = Math.max(1, totalLines - headTailLines + 1); - if (tailStart <= totalLines) { - ranges.push({ start: tailStart, end: totalLines, reason: "tail" }); - } - - let matchRanges = 0; - for (let idx = 0; idx < totalLines; idx += 1) { - if (matchRanges >= HEURISTIC_MAX_MATCH_RANGES) { - break; - } - - const line = params.lines[idx]; - const isConflictMarkerLine = GIT_CONFLICT_MARKER_REGEX.test(line); - const isImportantLine = HEURISTIC_IMPORTANT_LINE_REGEX.test(line); - - if (!isConflictMarkerLine && !isImportantLine) { - continue; - } - - const contextLines = isConflictMarkerLine ? 0 : HEURISTIC_CONTEXT_LINES; - - const lineNo = idx + 1; - const start = Math.max(1, lineNo - contextLines); - const end = Math.min(totalLines, lineNo + contextLines); - - ranges.push({ - start, - end, - reason: isConflictMarkerLine ? "conflict_marker" : "match", - }); - matchRanges += 1; - } - - return ranges; -} - -interface NormalizedRange { - start: number; - end: number; -} - -function normalizeKeepRanges(ranges: System1KeepRange[], maxLine: number): NormalizedRange[] { - assert(Number.isInteger(maxLine) && maxLine >= 0, "maxLine must be a non-negative integer"); - - const normalized: NormalizedRange[] = []; - for (const range of ranges) { - if (!Number.isFinite(range.start) || !Number.isFinite(range.end)) { - continue; - } - - // System 1 may return floats; clamp after rounding. - let start = Math.floor(range.start); - let end = Math.floor(range.end); - - if (start > end) { - [start, end] = [end, start]; - } - - // 1-based indexing. - start = Math.max(1, Math.min(maxLine, start)); - end = Math.max(1, Math.min(maxLine, end)); - - normalized.push({ start, end }); - } - - normalized.sort((a, b) => a.start - b.start || a.end - b.end); - - const merged: NormalizedRange[] = []; - for (const range of normalized) { - const prev = merged[merged.length - 1]; - if (!prev) { - merged.push(range); - continue; - } - - // Merge overlapping/adjacent ranges. - if (range.start <= prev.end + 1) { - prev.end = Math.max(prev.end, range.end); - continue; - } - - merged.push(range); - } - - return merged; -} - -export function applySystem1KeepRangesToOutput(params: { - rawOutput: string; - keepRanges: System1KeepRange[]; - maxKeptLines: number; -}): ApplySystem1KeepRangesResult | undefined { - assert(typeof params.rawOutput === "string", "rawOutput must be a string"); - assert(Array.isArray(params.keepRanges), "keepRanges must be an array"); - assert( - Number.isInteger(params.maxKeptLines) && params.maxKeptLines > 0, - "maxKeptLines must be a positive integer" - ); - - const lines = splitBashOutputLines(params.rawOutput); - const totalLines = lines.length; - - if (totalLines === 0) { - return { - filteredOutput: "", - keptLines: 0, - totalLines: 0, - }; - } - - const normalized = normalizeKeepRanges(params.keepRanges, totalLines); - if (normalized.length === 0) { - return undefined; - } - - const kept: string[] = []; - for (const range of normalized) { - for (let lineNo = range.start; lineNo <= range.end; lineNo += 1) { - kept.push(lines[lineNo - 1]); - - if (kept.length >= params.maxKeptLines) { - return { - filteredOutput: kept.join("\n"), - keptLines: kept.length, - totalLines, - }; - } - } - } - - return { - filteredOutput: kept.join("\n"), - keptLines: kept.length, - totalLines, - }; -} diff --git a/src/node/services/system1/system1AgentRunner.test.ts b/src/node/services/system1/system1AgentRunner.test.ts deleted file mode 100644 index 38fc0c2311..0000000000 --- a/src/node/services/system1/system1AgentRunner.test.ts +++ /dev/null @@ -1,284 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import type { LanguageModel } from "ai"; -import * as fs from "node:fs/promises"; -import * as os from "node:os"; -import * as path from "node:path"; - -import type { createRuntime as CreateRuntimeFn } from "@/node/runtime/runtimeFactory"; - -/* eslint-disable @typescript-eslint/no-require-imports, @typescript-eslint/no-unsafe-assignment */ -const { - createRuntime, -}: { createRuntime: typeof CreateRuntimeFn } = require("@/node/runtime/runtimeFactory?real=1"); -/* eslint-enable @typescript-eslint/no-require-imports, @typescript-eslint/no-unsafe-assignment */ -import { runSystem1KeepRangesForBashOutput } from "./system1AgentRunner"; - -// NOTE: These tests do not exercise a real model. -// We inject a stub generateTextImpl that simulates the model calling the tool. - -describe("system1AgentRunner", () => { - it("returns keep ranges when the model calls system1_keep_ranges", async () => { - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - - let calls = 0; - - const result = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath: process.cwd(), - runtimeTempDir: os.tmpdir(), - model: {} as unknown as LanguageModel, - modelString: "openai:gpt-5.1-codex-mini", - providerOptions: {}, - script: "echo hi", - numberedOutput: "0001| hi\n0002| ERROR: bad\n0003| at x", - maxKeptLines: 10, - timeoutMs: 5_000, - generateTextImpl: async (args) => { - calls += 1; - - // Tool use is mandated by the system1_bash agent prompt. - // Do not force tool_choice at the API layer (some providers reject that + thinking). - expect((args as { toolChoice?: unknown }).toolChoice).toBeUndefined(); - - const tools = (args as { tools?: unknown }).tools as Record | undefined; - expect(tools && "system1_keep_ranges" in tools).toBe(true); - - // Simulate the model calling the tool. - const keepRangesTool = tools!.system1_keep_ranges as { - execute: (input: unknown, options: unknown) => unknown; - }; - - await keepRangesTool.execute({ keep_ranges: [{ start: 2, end: 3, reason: "error" }] }, {}); - - return { finishReason: "stop" }; - }, - }); - - expect(calls).toBe(1); - expect(result).toEqual({ - keepRanges: [{ start: 2, end: 3, reason: "error" }], - finishReason: "stop", - timedOut: false, - }); - }); - - it("includes display name in the user message when provided", async () => { - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - - let calls = 0; - - const result = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath: process.cwd(), - runtimeTempDir: os.tmpdir(), - model: {} as unknown as LanguageModel, - modelString: "openai:gpt-5.1-codex-mini", - providerOptions: {}, - displayName: "List files", - script: "ls", - numberedOutput: "0001| a\n0002| b\n0003| c", - maxKeptLines: 10, - timeoutMs: 5_000, - generateTextImpl: async (args) => { - calls += 1; - - const messages = (args as { messages?: unknown }).messages as - | Array<{ content?: unknown }> - | undefined; - expect(Array.isArray(messages)).toBe(true); - - const firstContent = messages?.[0]?.content; - expect(typeof firstContent).toBe("string"); - expect(firstContent as string).toContain("Display name:"); - expect(firstContent as string).toContain("List files"); - - const tools = (args as { tools?: unknown }).tools as Record | undefined; - - // Simulate the model calling the tool. - const keepRangesTool = tools!.system1_keep_ranges as { - execute: (input: unknown, options: unknown) => unknown; - }; - - await keepRangesTool.execute({ keep_ranges: [{ start: 1, end: 1, reason: "first" }] }, {}); - - return { finishReason: "stop" }; - }, - }); - - expect(calls).toBe(1); - expect(result).toEqual({ - keepRanges: [{ start: 1, end: 1, reason: "first" }], - finishReason: "stop", - timedOut: false, - }); - }); - - it("ignores project overrides of the internal system1_bash agent prompt", async () => { - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - - const projectDir = await fs.mkdtemp(path.join(os.tmpdir(), "system1-runner-project-")); - try { - const agentsDir = path.join(projectDir, ".mux", "agents"); - await fs.mkdir(agentsDir, { recursive: true }); - await fs.writeFile( - path.join(agentsDir, "system1_bash.md"), - [ - "---", - "name: Override System1 Bash", - "ui:", - " hidden: true", - "subagent:", - " runnable: false", - "---", - "OVERRIDE_DO_NOT_USE", - "", - ].join("\n"), - "utf8" - ); - - const result = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath: projectDir, - runtimeTempDir: os.tmpdir(), - model: {} as unknown as LanguageModel, - modelString: "openai:gpt-5.1-codex-mini", - providerOptions: {}, - script: "echo hi", - numberedOutput: "0001| hi", - maxKeptLines: 10, - timeoutMs: 5_000, - generateTextImpl: async (args) => { - expect((args as { toolChoice?: unknown }).toolChoice).toBeUndefined(); - - const system = (args as { system?: unknown }).system; - expect(typeof system).toBe("string"); - expect(system).not.toContain("OVERRIDE_DO_NOT_USE"); - - const tools = (args as { tools?: unknown }).tools as Record | undefined; - expect(tools && "system1_keep_ranges" in tools).toBe(true); - - const keepRangesTool = tools!.system1_keep_ranges as { - execute: (input: unknown, options: unknown) => unknown; - }; - await keepRangesTool.execute({ keep_ranges: [{ start: 1, end: 1, reason: "hi" }] }, {}); - - return { finishReason: "stop" }; - }, - }); - - expect(result).toEqual({ - keepRanges: [{ start: 1, end: 1, reason: "hi" }], - finishReason: "stop", - timedOut: false, - }); - } finally { - await fs.rm(projectDir, { recursive: true, force: true }); - } - }); - - it("retries once with a reminder if the model does not call the tool", async () => { - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - - let calls = 0; - - const result = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath: process.cwd(), - runtimeTempDir: os.tmpdir(), - model: {} as unknown as LanguageModel, - modelString: "openai:gpt-5.1-codex-mini", - providerOptions: {}, - script: "echo hi", - numberedOutput: "0001| hi", - maxKeptLines: 10, - timeoutMs: 5_000, - generateTextImpl: async (args) => { - calls += 1; - - const messages = (args as { messages?: unknown }).messages as - | Array<{ content?: unknown }> - | undefined; - expect(Array.isArray(messages)).toBe(true); - - if (calls === 1) { - expect(messages!.length).toBe(1); - return { finishReason: "stop" }; - } - - expect(messages!.length).toBe(2); - expect(messages![1]?.content).toBe( - "Reminder: You MUST call `system1_keep_ranges` exactly once. Do not output any text; only the tool call." - ); - - const tools = (args as { tools?: unknown }).tools as Record | undefined; - const keepRangesTool = tools!.system1_keep_ranges as { - execute: (input: unknown, options: unknown) => unknown; - }; - - await keepRangesTool.execute({ keep_ranges: [{ start: 1, end: 1, reason: "hi" }] }, {}); - return { finishReason: "stop" }; - }, - }); - - expect(calls).toBe(2); - expect(result).toEqual({ - keepRanges: [{ start: 1, end: 1, reason: "hi" }], - finishReason: "stop", - timedOut: false, - }); - }); - - it("returns undefined when the model does not call the tool", async () => { - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - - let calls = 0; - - const result = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath: process.cwd(), - runtimeTempDir: os.tmpdir(), - model: {} as unknown as LanguageModel, - modelString: "openai:gpt-5.1-codex-mini", - providerOptions: {}, - script: "echo hi", - numberedOutput: "0001| hi", - maxKeptLines: 10, - timeoutMs: 5_000, - generateTextImpl: () => { - calls += 1; - return Promise.resolve({ finishReason: "stop" }); - }, - }); - - expect(calls).toBe(2); - expect(result).toBeUndefined(); - }); - - it("returns undefined on AbortError", async () => { - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - - let calls = 0; - - const result = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath: process.cwd(), - runtimeTempDir: os.tmpdir(), - model: {} as unknown as LanguageModel, - modelString: "openai:gpt-5.1-codex-mini", - providerOptions: {}, - script: "echo hi", - numberedOutput: "0001| hi", - maxKeptLines: 10, - timeoutMs: 5_000, - generateTextImpl: () => { - calls += 1; - const err = new Error("aborted"); - err.name = "AbortError"; - return Promise.reject(err); - }, - }); - - expect(calls).toBe(1); - expect(result).toBeUndefined(); - }); -}); diff --git a/src/node/services/system1/system1AgentRunner.ts b/src/node/services/system1/system1AgentRunner.ts deleted file mode 100644 index 919cb47857..0000000000 --- a/src/node/services/system1/system1AgentRunner.ts +++ /dev/null @@ -1,228 +0,0 @@ -import assert from "@/common/utils/assert"; - -import { generateText, type LanguageModel, type Tool } from "ai"; -import type { LanguageModelV2Usage } from "@ai-sdk/provider"; - -import type { Runtime } from "@/node/runtime/Runtime"; - -import { resolveAgentBody } from "@/node/services/agentDefinitions/agentDefinitionsService"; -import { createSystem1KeepRangesTool } from "@/node/services/tools/system1_keep_ranges"; -import type { System1KeepRange } from "@/node/services/system1/bashOutputFiltering"; -import { linkAbortSignal } from "@/node/utils/abort"; - -export type GenerateTextLike = (args: Parameters[0]) => Promise<{ - finishReason?: string; - usage?: LanguageModelV2Usage; - providerMetadata?: Record; -}>; -export interface RunSystem1KeepRangesParams { - runtime: Runtime; - agentDiscoveryPath: string; - runtimeTempDir: string; - - model: LanguageModel; - modelString: string; - providerOptions?: Record; - - // Optional short label describing what the bash command is doing (intent hint). - // This is intentionally lightweight to avoid bloating the System 1 prompt. - displayName?: string; - - script: string; - numberedOutput: string; - maxKeptLines: number; - - timeoutMs: number; - abortSignal?: AbortSignal; - onTimeout?: () => void; - - // Testing hook: allows unit tests to stub the AI SDK call. - generateTextImpl?: GenerateTextLike; -} - -export async function runSystem1KeepRangesForBashOutput( - params: RunSystem1KeepRangesParams -): Promise< - | { - keepRanges: System1KeepRange[]; - finishReason?: string; - timedOut: boolean; - usage?: LanguageModelV2Usage; - providerMetadata?: Record; - } - | undefined -> { - assert(params, "params is required"); - assert(params.runtime, "runtime is required"); - assert( - typeof params.agentDiscoveryPath === "string" && params.agentDiscoveryPath.length > 0, - "agentDiscoveryPath must be a non-empty string" - ); - assert( - typeof params.runtimeTempDir === "string" && params.runtimeTempDir.length > 0, - "runtimeTempDir must be a non-empty string" - ); - assert(params.model, "model is required"); - assert( - params.displayName === undefined || typeof params.displayName === "string", - "displayName must be a string when provided" - ); - assert( - typeof params.modelString === "string" && params.modelString.length > 0, - "modelString must be a non-empty string" - ); - assert(typeof params.script === "string", "script must be a string"); - assert( - typeof params.numberedOutput === "string" && params.numberedOutput.length > 0, - "numberedOutput must be a non-empty string" - ); - assert( - Number.isInteger(params.maxKeptLines) && params.maxKeptLines > 0, - "maxKeptLines must be a positive integer" - ); - assert( - Number.isInteger(params.timeoutMs) && params.timeoutMs > 0, - "timeoutMs must be a positive integer" - ); - - // Intentionally keep the System 1 prompt minimal to avoid consuming context budget. - // - // Use the built-in definition for this internal agent. Allowing project/global overrides - // would introduce a new footgun compared to the previously hard-coded System1 prompt. - const systemPrompt = await resolveAgentBody( - params.runtime, - params.agentDiscoveryPath, - "system1_bash", - { skipScopesAbove: "global" } - ); - - const userMessageParts = [`maxKeptLines: ${params.maxKeptLines}`, ""]; - - const displayName = - typeof params.displayName === "string" && params.displayName.trim().length > 0 - ? params.displayName.trim() - : undefined; - if (displayName) { - userMessageParts.push(`Display name:\n${displayName}`, ""); - } - - userMessageParts.push( - `Bash script:\n${params.script}`, - "", - `Numbered output:\n${params.numberedOutput}` - ); - - const userMessage = userMessageParts.join("\n"); - - const system1AbortController = new AbortController(); - const unlink = linkAbortSignal(params.abortSignal, system1AbortController); - - let timedOut = false; - const timeout = setTimeout(() => { - timedOut = true; - params.onTimeout?.(); - system1AbortController.abort(); - }, params.timeoutMs); - timeout.unref?.(); - - // Some providers (Anthropic) reject requests that force tool use while also enabling - // "thinking". Since the System 1 agent already mandates tool usage, keep requests - // provider-agnostic and retry once with a stronger reminder if needed. - const attemptMessages: Array[0]["messages"]>> = [ - [{ role: "user", content: userMessage }], - [ - { role: "user", content: userMessage }, - { - role: "user", - content: - "Reminder: You MUST call `system1_keep_ranges` exactly once. Do not output any text; only the tool call.", - }, - ], - ]; - - const generate = params.generateTextImpl ?? generateText; - let responseWithUsage: - | { - finishReason?: string; - usage: LanguageModelV2Usage; - providerMetadata?: Record; - } - | undefined; - - try { - for (const messages of attemptMessages) { - let keepRanges: System1KeepRange[] | undefined; - - const tools: Record = { - system1_keep_ranges: createSystem1KeepRangesTool( - // This tool is pure/side-effect-free; config is unused. - // Provide a minimal config object for interface compatibility. - { - cwd: params.agentDiscoveryPath, - runtime: params.runtime, - runtimeTempDir: params.runtimeTempDir, - }, - { - onKeepRanges: (ranges) => { - keepRanges = ranges; - }, - } - ), - }; - - let response: Awaited>; - try { - response = await generate({ - model: params.model, - system: systemPrompt, - messages, - tools, - abortSignal: system1AbortController.signal, - // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment - providerOptions: params.providerOptions as any, - maxOutputTokens: 300, - maxRetries: 0, - }); - } catch (error) { - const errorName = error instanceof Error ? error.name : undefined; - if (errorName === "AbortError") { - return undefined; - } - throw error; - } - - if (keepRanges && keepRanges.length > 0) { - return { - keepRanges, - finishReason: response.finishReason, - timedOut, - usage: response.usage, - providerMetadata: response.providerMetadata, - }; - } - - if (response.usage) { - responseWithUsage = { - finishReason: response.finishReason, - usage: response.usage, - providerMetadata: response.providerMetadata, - }; - } - } - - if (responseWithUsage) { - return { - keepRanges: [], - finishReason: responseWithUsage.finishReason, - timedOut, - usage: responseWithUsage.usage, - providerMetadata: responseWithUsage.providerMetadata, - }; - } - - return undefined; - } finally { - clearTimeout(timeout); - unlink(); - } -} diff --git a/src/node/services/system1ToolWrapper.ts b/src/node/services/system1ToolWrapper.ts deleted file mode 100644 index 9cafc46dae..0000000000 --- a/src/node/services/system1ToolWrapper.ts +++ /dev/null @@ -1,741 +0,0 @@ -/** - * System1 bash output compaction: wraps bash/bash_output/task_await tools so - * large outputs are automatically filtered by a lightweight "System 1" LLM - * before being returned to the main conversation. - * - * Extracted from the ~660-line IIFE that lived inside AIService.streamMessage(). - */ -import * as path from "path"; -import type { LanguageModel, Tool } from "ai"; -import { - applySystem1KeepRangesToOutput, - formatNumberedLinesForSystem1, - formatSystem1BashFilterNotice, - getHeuristicKeepRangesForBashOutput, - splitBashOutputLines, -} from "@/node/services/system1/bashOutputFiltering"; -import { decideBashOutputCompaction } from "@/node/services/system1/bashCompactionPolicy"; -import { truncateBashOutput } from "@/common/utils/truncateBashOutput"; -import { runSystem1KeepRangesForBashOutput } from "@/node/services/system1/system1AgentRunner"; -import { - formatBashOutputReport, - tryParseBashOutputReport, -} from "@/node/services/tools/bashTaskReport"; -import type { BashOutputEvent } from "@/common/types/stream"; -import type { TaskSettings } from "@/common/types/tasks"; -import { DEFAULT_TASK_SETTINGS, SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS } from "@/common/types/tasks"; -import type { ProviderName } from "@/common/constants/providers"; -import { getExplicitGatewayPrefix, normalizeToCanonical } from "@/common/utils/ai/models"; -import { buildProviderOptions } from "@/common/utils/ai/providerOptions"; -import { createDisplayUsage } from "@/common/utils/tokens/displayUsage"; -import { enforceThinkingPolicy } from "@/common/utils/thinking/policy"; -import type { ThinkingLevel } from "@/common/types/thinking"; -import type { MuxProviderOptions } from "@/common/types/providerOptions"; -import type { Runtime } from "@/node/runtime/Runtime"; -import type { Result } from "@/common/types/result"; -import type { SendMessageError } from "@/common/types/errors"; -import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors"; -import { log } from "./log"; -import type { SessionUsageService } from "./sessionUsageService"; -import { getErrorMessage } from "@/common/utils/errors"; - -// --------------------------------------------------------------------------- -// Public interface -// --------------------------------------------------------------------------- - -export interface System1WrapOptions { - tools: Record; - /** Raw system1Model string from caller (may be empty). */ - system1Model: string | undefined; - system1ThinkingLevel: ThinkingLevel | undefined; - /** The primary model string (used as fallback when system1Model is empty). */ - modelString: string; - /** Resolved primary model string after gateway resolution. */ - effectiveModelString: string; - /** Already-created primary model instance. */ - primaryModel: LanguageModel; - /** Route provider for the primary stream when System1 reuses that model. */ - routeProvider?: ProviderName; - muxProviderOptions: MuxProviderOptions; - workspaceId: string; - promptCacheScope?: string; - effectiveMode: string; - planFilePath: string; - taskSettings: TaskSettings; - runtimeTempDir: string; - runtime: Runtime; - agentDiscoveryPath: string; - /** Callbacks to break the dependency on AIService / StreamManager. */ - createModel: ( - modelString: string, - opts?: MuxProviderOptions, - createOptions?: { agentInitiated?: boolean; workspaceId?: string } - ) => Promise>; - emitBashOutput: (event: BashOutputEvent) => void; - sessionUsageService?: SessionUsageService; -} - -/** - * Wrap bash / bash_output / task_await tools with System1 output compaction. - * Returns the wrapped tool map (or the originals unchanged if bash is missing). - */ -export function wrapToolsWithSystem1(opts: System1WrapOptions): Record { - const { tools } = opts; - const baseBashTool = tools.bash; - if (!baseBashTool) return tools; - - const bashExecuteFn = getExecuteFn(baseBashTool); - if (!bashExecuteFn) return tools; - - const bashOutputExecuteFn = getExecuteFn(tools.bash_output); - const taskAwaitExecuteFn = getExecuteFn(tools.task_await); - - // Resolve System1 model configuration - const system1Ctx = buildSystem1ModelContext(opts); - - // Lazy-create and cache the System1 model for the duration of this stream. - let cachedSystem1Model: { modelString: string; model: LanguageModel } | undefined; - let cachedSystem1ModelFailed = false; - - const getSystem1Model = async (): Promise< - { modelString: string; model: LanguageModel } | undefined - > => { - if (!system1Ctx.modelString) { - return { modelString: opts.effectiveModelString, model: opts.primaryModel }; - } - if (cachedSystem1Model) return cachedSystem1Model; - if (cachedSystem1ModelFailed) return undefined; - - // createModel handles gateway routing automatically — pass the raw string. - const created = await opts.createModel(system1Ctx.modelString, opts.muxProviderOptions, { - agentInitiated: true, - workspaceId: opts.workspaceId, - }); - if (!created.success) { - cachedSystem1ModelFailed = true; - log.debug("[system1] Failed to create System 1 model", { - workspaceId: opts.workspaceId, - system1Model: system1Ctx.modelString, - error: created.error, - }); - return undefined; - } - - cachedSystem1Model = { modelString: system1Ctx.modelString, model: created.data }; - return cachedSystem1Model; - }; - - // Core filtering function shared by all three wrapped tools. - const maybeFilter = (params: FilterParams) => - maybeFilterBashOutput({ - ...params, - opts, - system1Ctx, - getSystem1Model, - }); - - // Build wrapped tool map - const wrappedTools: Record = { - ...tools, - bash: wrapBashTool(baseBashTool, bashExecuteFn, maybeFilter, opts.workspaceId), - }; - - if (tools.bash_output && bashOutputExecuteFn) { - wrappedTools.bash_output = wrapBashOutputTool( - tools.bash_output, - bashOutputExecuteFn, - maybeFilter, - opts.workspaceId - ); - } - - if (tools.task_await && taskAwaitExecuteFn) { - wrappedTools.task_await = wrapTaskAwaitTool( - tools.task_await, - taskAwaitExecuteFn, - maybeFilter, - opts.workspaceId - ); - } - - return wrappedTools; -} - -// --------------------------------------------------------------------------- -// Tool helpers (moved from module-level in aiService.ts) -// --------------------------------------------------------------------------- - -/** Concatenate an extra note onto a tool result's existing note. */ -function appendToolNote(existing: string | undefined, extra: string): string { - return existing ? `${existing}\n\n${extra}` : extra; -} - -// --------------------------------------------------------------------------- -// Internal helpers -// --------------------------------------------------------------------------- - -type ExecuteFn = (this: unknown, args: unknown, options: unknown) => Promise; - -function getExecuteFn(tool: Tool | undefined): ExecuteFn | undefined { - if (!tool) return undefined; - const record = tool as unknown as Record; - const execute = record.execute; - return typeof execute === "function" ? (execute as ExecuteFn) : undefined; -} - -interface System1ModelContext { - /** Raw model string (may include mux-gateway: prefix). Passed to createModel which resolves gateway routing internally. */ - modelString: string; - thinkingLevel: ThinkingLevel; -} - -function buildSystem1ModelContext(opts: System1WrapOptions): System1ModelContext { - const raw = typeof opts.system1Model === "string" ? opts.system1Model.trim() : ""; - // Canonical form (gateway prefix stripped) for provider checks like thinking level. - const canonical = raw ? normalizeToCanonical(raw) : ""; - const effectiveModelForThinking = canonical || opts.modelString; - const thinkingLevel = enforceThinkingPolicy( - effectiveModelForThinking, - opts.system1ThinkingLevel ?? "off" - ); - // Store the raw string so createModel can detect explicit mux-gateway: prefix. - return { modelString: raw, thinkingLevel }; -} - -// --------------------------------------------------------------------------- -// Core filtering logic -// --------------------------------------------------------------------------- - -interface FilterParams { - toolName: string; - output: string; - script: string; - displayName?: string; - toolCallId?: string; - abortSignal?: AbortSignal; -} - -interface FilterDeps { - opts: System1WrapOptions; - system1Ctx: System1ModelContext; - getSystem1Model: () => Promise<{ modelString: string; model: LanguageModel } | undefined>; -} - -async function maybeFilterBashOutput( - params: FilterParams & FilterDeps -): Promise<{ filteredOutput: string; notice: string } | undefined> { - const { opts, system1Ctx, getSystem1Model, ...filterParams } = params; - - if (typeof filterParams.output !== "string" || filterParams.output.length === 0) { - return undefined; - } - - // Hard truncation safety net — bounds output even when System1 is skipped. - const hardTruncation = truncateBashOutput(filterParams.output); - const returnHardTruncationIfNeeded = (): - | { filteredOutput: string; notice: string } - | undefined => { - if (!hardTruncation.truncated) return undefined; - return { - filteredOutput: hardTruncation.output, - notice: `Output exceeded hard limits (${hardTruncation.originalLines} lines, ${hardTruncation.originalBytes} bytes). Showing last ${hardTruncation.output.split("\n").length} lines.`, - }; - }; - - let system1TimedOut = false; - - try { - const taskSettings = opts.taskSettings; - const minLines = - taskSettings.bashOutputCompactionMinLines ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default; - const minTotalBytes = - taskSettings.bashOutputCompactionMinTotalBytes ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinTotalBytes.default; - const userMaxKeptLines = - taskSettings.bashOutputCompactionMaxKeptLines ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMaxKeptLines.default; - const heuristicFallbackEnabled = - taskSettings.bashOutputCompactionHeuristicFallback ?? - DEFAULT_TASK_SETTINGS.bashOutputCompactionHeuristicFallback ?? - true; - const timeoutMs = - taskSettings.bashOutputCompactionTimeoutMs ?? - SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionTimeoutMs.default; - - const lines = splitBashOutputLines(filterParams.output); - const bytes = Buffer.byteLength(filterParams.output, "utf-8"); - - const decision = decideBashOutputCompaction({ - toolName: filterParams.toolName, - script: filterParams.script, - displayName: filterParams.displayName, - planFilePath: opts.effectiveMode === "plan" ? opts.planFilePath : undefined, - totalLines: lines.length, - totalBytes: bytes, - minLines, - minTotalBytes, - maxKeptLines: userMaxKeptLines, - }); - - const { triggeredByLines, triggeredByBytes } = decision; - - if (!triggeredByLines && !triggeredByBytes) { - return returnHardTruncationIfNeeded(); - } - - if (!decision.shouldCompact) { - log.debug("[system1] Skipping bash output compaction", { - workspaceId: opts.workspaceId, - toolName: filterParams.toolName, - skipReason: decision.skipReason, - intent: decision.intent, - alreadyTargeted: decision.alreadyTargeted, - displayName: filterParams.displayName, - totalLines: lines.length, - totalBytes: bytes, - triggeredByLines, - triggeredByBytes, - minLines, - minTotalBytes, - userMaxKeptLines, - heuristicFallbackEnabled, - timeoutMs, - }); - return returnHardTruncationIfNeeded(); - } - - const maxKeptLines = decision.effectiveMaxKeptLines; - - log.debug("[system1] Bash output compaction triggered", { - workspaceId: opts.workspaceId, - toolName: filterParams.toolName, - intent: decision.intent, - alreadyTargeted: decision.alreadyTargeted, - displayName: filterParams.displayName, - totalLines: lines.length, - totalBytes: bytes, - triggeredByLines, - triggeredByBytes, - minLines, - minTotalBytes, - userMaxKeptLines, - maxKeptLines, - heuristicFallbackEnabled, - timeoutMs, - }); - - // Save full output to temp file for agent reference - let fullOutputPath: string | undefined; - try { - const fileId = Math.random().toString(16).substring(2, 10); - fullOutputPath = path.posix.join(opts.runtimeTempDir, `bash-full-${fileId}.txt`); - const writer = opts.runtime.writeFile(fullOutputPath, filterParams.abortSignal); - const writerInstance = writer.getWriter(); - await writerInstance.write(new TextEncoder().encode(filterParams.output)); - await writerInstance.close(); - } catch (error) { - log.debug("[system1] Failed to save full bash output to temp file", { - workspaceId: opts.workspaceId, - error: getErrorMessage(error), - }); - fullOutputPath = undefined; - } - - const system1 = await getSystem1Model(); - if (!system1) return undefined; - - // When System1 uses a gateway-prefixed model, keep that explicit gateway so - // buildProviderOptions uses the override's gateway namespace. Canonical - // System1 models inherit the primary stream's active route provider. - const system1RouteProvider = system1Ctx.modelString - ? (getExplicitGatewayPrefix(system1Ctx.modelString) ?? opts.routeProvider) - : opts.routeProvider; - const system1ProviderOptions = buildProviderOptions( - system1.modelString, - system1Ctx.thinkingLevel, - undefined, - undefined, - opts.muxProviderOptions, - opts.workspaceId, - undefined, - undefined, - system1RouteProvider, - opts.promptCacheScope - ) as unknown as Record; - - const numberedOutput = formatNumberedLinesForSystem1(lines); - const startTimeMs = Date.now(); - - if (typeof filterParams.toolCallId === "string" && filterParams.toolCallId.length > 0) { - opts.emitBashOutput({ - type: "bash-output", - workspaceId: opts.workspaceId, - toolCallId: filterParams.toolCallId, - phase: "filtering", - text: "", - isError: false, - timestamp: Date.now(), - } satisfies BashOutputEvent); - } - - let filterMethod: "system1" | "heuristic" = "system1"; - let keepRangesCount = 0; - let finishReason: string | undefined; - let lastErrorName: string | undefined; - let lastErrorMessage: string | undefined; - let applied: ReturnType = undefined; - - try { - const keepRangesResult = await runSystem1KeepRangesForBashOutput({ - runtime: opts.runtime, - agentDiscoveryPath: opts.agentDiscoveryPath, - runtimeTempDir: opts.runtimeTempDir, - model: system1.model, - modelString: system1.modelString, - providerOptions: system1ProviderOptions, - displayName: filterParams.displayName, - script: filterParams.script, - numberedOutput, - maxKeptLines, - timeoutMs, - abortSignal: filterParams.abortSignal, - onTimeout: () => { - system1TimedOut = true; - }, - }); - - if (keepRangesResult) { - finishReason = keepRangesResult.finishReason; - keepRangesCount = keepRangesResult.keepRanges.length; - - // Track System 1 token usage in workspace costs. - // Normalize the model string so gateway-routed models merge into the - // same cost bucket as direct calls. Pass providerMetadata so cache - // tokens and costsIncluded are honored. - if (keepRangesResult.usage && opts.sessionUsageService) { - const normalizedModel = normalizeToCanonical(system1.modelString); - const displayUsage = createDisplayUsage( - keepRangesResult.usage, - normalizedModel, - keepRangesResult.providerMetadata - ); - if (displayUsage) { - void opts.sessionUsageService.recordUsage( - opts.workspaceId, - normalizedModel, - displayUsage - ); - } - } - - applied = applySystem1KeepRangesToOutput({ - rawOutput: filterParams.output, - keepRanges: keepRangesResult.keepRanges, - maxKeptLines, - }); - } - } catch (error) { - lastErrorName = error instanceof Error ? error.name : undefined; - lastErrorMessage = getErrorMessage(error); - } - - if (!applied || applied.keptLines === 0) { - const elapsedMs = Date.now() - startTimeMs; - const upstreamAborted = filterParams.abortSignal?.aborted ?? false; - - log.debug("[system1] Failed to generate keep_ranges", { - workspaceId: opts.workspaceId, - toolName: filterParams.toolName, - system1Model: system1.modelString, - elapsedMs, - timedOut: system1TimedOut, - upstreamAborted, - keepRangesCount, - errorName: lastErrorName, - error: lastErrorMessage, - }); - - if (!heuristicFallbackEnabled || upstreamAborted) return undefined; - - const heuristicKeepRanges = getHeuristicKeepRangesForBashOutput({ lines, maxKeptLines }); - keepRangesCount = heuristicKeepRanges.length; - applied = applySystem1KeepRangesToOutput({ - rawOutput: filterParams.output, - keepRanges: heuristicKeepRanges, - maxKeptLines, - }); - filterMethod = "heuristic"; - } - - if (!applied || applied.keptLines === 0) { - log.debug("[system1] keep_ranges produced empty filtered output", { - workspaceId: opts.workspaceId, - toolName: filterParams.toolName, - filterMethod, - keepRangesCount, - maxKeptLines, - totalLines: lines.length, - }); - return undefined; - } - - const elapsedMs = Date.now() - startTimeMs; - const trigger = [triggeredByLines ? "lines" : null, triggeredByBytes ? "bytes" : null] - .filter(Boolean) - .join("+"); - - const notice = formatSystem1BashFilterNotice({ - keptLines: applied.keptLines, - totalLines: applied.totalLines, - trigger, - fullOutputPath, - }); - - log.debug("[system1] Filtered bash tool output", { - workspaceId: opts.workspaceId, - toolName: filterParams.toolName, - intent: decision.intent, - alreadyTargeted: decision.alreadyTargeted, - displayName: filterParams.displayName, - userMaxKeptLines, - maxKeptLines, - system1Model: system1.modelString, - filterMethod, - keepRangesCount, - finishReason, - elapsedMs, - keptLines: applied.keptLines, - totalLines: applied.totalLines, - totalBytes: bytes, - triggeredByLines, - triggeredByBytes, - timeoutMs, - }); - - return { filteredOutput: applied.filteredOutput, notice }; - } catch (error) { - const errorMessage = getErrorMessage(error); - const errorName = error instanceof Error ? error.name : undefined; - const upstreamAborted = filterParams.abortSignal?.aborted ?? false; - const isAbortError = errorName === "AbortError"; - - log.debug("[system1] Failed to filter bash tool output", { - workspaceId: opts.workspaceId, - toolName: filterParams.toolName, - error: errorMessage, - errorName, - timedOut: system1TimedOut, - upstreamAborted, - isAbortError, - }); - return returnHardTruncationIfNeeded(); - } -} - -// --------------------------------------------------------------------------- -// Tool wrappers -// --------------------------------------------------------------------------- - -type MaybeFilterFn = ( - params: FilterParams -) => Promise<{ filteredOutput: string; notice: string } | undefined>; - -/** - * Merge filtered output into a tool result, appending notice to the note field. - * Returns undefined if the result wasn't filtered (caller should return original). - */ -function applyFilteredResult( - result: unknown, - filtered: { filteredOutput: string; notice: string } | undefined, - outputField: "output" | "reportMarkdown" = "output" -): Record | undefined { - if (!filtered) return undefined; - const existingNote = (result as { note?: unknown } | undefined)?.note; - return { - ...(result as Record), - [outputField]: filtered.filteredOutput, - note: appendToolNote( - typeof existingNote === "string" ? existingNote : undefined, - filtered.notice - ), - }; -} - -function wrapBashTool( - baseTool: Tool, - executeFn: ExecuteFn, - maybeFilter: MaybeFilterFn, - workspaceId: string -): Tool { - const wrapped = cloneToolPreservingDescriptors(baseTool); - const record = wrapped as unknown as Record; - - record.execute = async (args: unknown, options: unknown) => { - const result: unknown = await executeFn.call(baseTool, args, options); - - try { - const runInBackground = - Boolean((args as { run_in_background?: unknown } | undefined)?.run_in_background) || - (result && typeof result === "object" && "backgroundProcessId" in result); - if (runInBackground) return result; - - const output = (result as { output?: unknown } | undefined)?.output; - if (typeof output !== "string" || output.length === 0) return result; - - const displayName = - typeof (args as { display_name?: unknown } | undefined)?.display_name === "string" - ? String((args as { display_name?: unknown }).display_name).trim() || undefined - : undefined; - const script = - typeof (args as { script?: unknown } | undefined)?.script === "string" - ? String((args as { script?: unknown }).script) - : ""; - const toolCallId = - typeof (options as { toolCallId?: unknown } | undefined)?.toolCallId === "string" - ? (options as { toolCallId?: string }).toolCallId - : undefined; - - const filtered = await maybeFilter({ - toolName: "bash", - output, - script, - displayName, - toolCallId, - abortSignal: (options as { abortSignal?: AbortSignal } | undefined)?.abortSignal, - }); - return applyFilteredResult(result, filtered) ?? result; - } catch (error) { - log.debug("[system1] Failed to filter bash tool output", { - workspaceId, - error: getErrorMessage(error), - }); - return result; - } - }; - - return wrapped; -} - -function wrapBashOutputTool( - baseTool: Tool, - executeFn: ExecuteFn, - maybeFilter: MaybeFilterFn, - workspaceId: string -): Tool { - const wrapped = cloneToolPreservingDescriptors(baseTool); - const record = wrapped as unknown as Record; - - record.execute = async (args: unknown, options: unknown) => { - const result: unknown = await executeFn.call(baseTool, args, options); - - try { - const output = (result as { output?: unknown } | undefined)?.output; - if (typeof output !== "string" || output.length === 0) return result; - - const filtered = await maybeFilter({ - toolName: "bash_output", - output, - script: "", - abortSignal: (options as { abortSignal?: AbortSignal } | undefined)?.abortSignal, - }); - return applyFilteredResult(result, filtered) ?? result; - } catch (error) { - log.debug("[system1] Failed to filter bash_output tool output", { - workspaceId, - error: getErrorMessage(error), - }); - return result; - } - }; - - return wrapped; -} - -function wrapTaskAwaitTool( - baseTool: Tool, - executeFn: ExecuteFn, - maybeFilter: MaybeFilterFn, - workspaceId: string -): Tool { - const wrapped = cloneToolPreservingDescriptors(baseTool); - const record = wrapped as unknown as Record; - - record.execute = async (args: unknown, options: unknown) => { - const result: unknown = await executeFn.call(baseTool, args, options); - - try { - const resultsValue = (result as { results?: unknown } | undefined)?.results; - if (!Array.isArray(resultsValue) || resultsValue.length === 0) return result; - - const abortSignal = (options as { abortSignal?: AbortSignal } | undefined)?.abortSignal; - - const filteredResults = await Promise.all( - resultsValue.map(async (entry: unknown) => { - if (!entry || typeof entry !== "object") return entry; - - const taskId = (entry as { taskId?: unknown }).taskId; - if (typeof taskId !== "string" || !taskId.startsWith("bash:")) return entry; - - const status = (entry as { status?: unknown }).status; - - if (status === "running") { - const output = (entry as { output?: unknown }).output; - if (typeof output !== "string" || output.length === 0) return entry; - - const filtered = await maybeFilter({ - toolName: "task_await", - output, - script: "", - abortSignal, - }); - return applyFilteredResult(entry, filtered) ?? entry; - } - - if (status === "completed") { - const reportMarkdown = (entry as { reportMarkdown?: unknown }).reportMarkdown; - if (typeof reportMarkdown !== "string" || reportMarkdown.length === 0) return entry; - - const parsed = tryParseBashOutputReport(reportMarkdown); - if (!parsed || parsed.output.length === 0) return entry; - - const filtered = await maybeFilter({ - toolName: "task_await", - output: parsed.output, - script: "", - abortSignal, - }); - if (!filtered) return entry; - - const existingNote = (entry as { note?: unknown }).note; - return { - ...(entry as Record), - reportMarkdown: formatBashOutputReport({ - processId: parsed.processId, - status: parsed.status, - exitCode: parsed.exitCode, - output: filtered.filteredOutput, - }), - note: appendToolNote( - typeof existingNote === "string" ? existingNote : undefined, - filtered.notice - ), - }; - } - - return entry; - }) - ); - - return { ...(result as Record), results: filteredResults }; - } catch (error) { - log.debug("[system1] Failed to filter task_await tool output", { - workspaceId, - error: getErrorMessage(error), - }); - return result; - } - }; - - return wrapped; -} diff --git a/src/node/services/telemetryService.featureFlags.test.ts b/src/node/services/telemetryService.featureFlags.test.ts index c16a4b4a1e..b57308b280 100644 --- a/src/node/services/telemetryService.featureFlags.test.ts +++ b/src/node/services/telemetryService.featureFlags.test.ts @@ -52,7 +52,7 @@ describe("TelemetryService feature flag properties", () => { // @ts-expect-error - Accessing private property for test telemetry.distinctId = "distinct-id"; - telemetry.setFeatureFlagVariant("system-1", "test"); + telemetry.setFeatureFlagVariant("programmatic-tool-calling", "test"); const payload: TelemetryEventPayload = { event: "message_sent", @@ -78,7 +78,7 @@ describe("TelemetryService feature flag properties", () => { | { properties?: Record } | undefined; expect(call?.properties).toBeDefined(); - expect(call?.properties?.["$feature/system-1"]).toBe("test"); + expect(call?.properties?.["$feature/programmatic-tool-calling"]).toBe("test"); } finally { // Restore all env vars for (const [key, value] of Object.entries(savedEnv)) { diff --git a/src/node/services/tools/system1_keep_ranges.ts b/src/node/services/tools/system1_keep_ranges.ts deleted file mode 100644 index c726c5177c..0000000000 --- a/src/node/services/tools/system1_keep_ranges.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { tool } from "ai"; -import type { Tool } from "ai"; - -import type { z } from "zod"; -import type { ToolConfiguration } from "@/common/utils/tools/tools"; -import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; - -import type { System1KeepRange } from "@/node/services/system1/bashOutputFiltering"; - -// Derived from the Zod schema (single source of truth) to avoid drift. -export type System1KeepRangesToolArgs = z.infer; - -export type System1KeepRangesToolResult = - | { - success: true; - } - | { - success: false; - error: string; - }; - -export function createSystem1KeepRangesTool( - _config: ToolConfiguration, - options?: { - onKeepRanges?: (keepRanges: System1KeepRange[]) => void; - } -): Tool { - let called = false; - - return tool({ - description: TOOL_DEFINITIONS.system1_keep_ranges.description, - inputSchema: TOOL_DEFINITIONS.system1_keep_ranges.schema, - execute: ({ keep_ranges }: System1KeepRangesToolArgs): System1KeepRangesToolResult => { - // Defensive: the model should only call this once, but don't error-loop if it retries. - if (called) { - return { success: true }; - } - called = true; - options?.onKeepRanges?.(keep_ranges); - return { success: true }; - }, - }); -} diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index ee672167da..e41970871e 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -26,7 +26,7 @@ import { coerceAgentStatus } from "@/node/utils/extensionMetadata"; import { readTodosForSessionDir } from "@/node/services/todos/todoStorage"; import type { TelemetryService } from "@/node/services/telemetryService"; import type { ExperimentsService } from "@/node/services/experimentsService"; -import { EXPERIMENT_IDS, EXPERIMENTS } from "@/common/constants/experiments"; +import { EXPERIMENT_IDS } from "@/common/constants/experiments"; import type { PolicyService } from "@/node/services/policyService"; import type { MCPServerManager } from "@/node/services/mcpServerManager"; import { @@ -5375,42 +5375,7 @@ export class WorkspaceService extends EventEmitter { void this.updateRecencyTimestamp(workspaceId, messageTimestamp); } - // Experiments: resolve flags respecting userOverridable setting. - // - If userOverridable && frontend provides a value (explicit override) → use frontend value - // - Else if remote evaluation enabled → use PostHog assignment - // - Else → use frontend value (dev fallback) or default - const system1Experiment = EXPERIMENTS[EXPERIMENT_IDS.SYSTEM_1]; - const system1FrontendValue = options?.experiments?.system1; - - let system1Enabled: boolean | undefined; - if (system1Experiment.userOverridable && system1FrontendValue !== undefined) { - // User-overridable: trust frontend value (user's explicit choice) - system1Enabled = system1FrontendValue; - } else if (this.experimentsService?.isRemoteEvaluationEnabled() === true) { - // Remote evaluation: use PostHog assignment - system1Enabled = this.experimentsService.isExperimentEnabled(EXPERIMENT_IDS.SYSTEM_1); - } else { - // Fallback to frontend value (dev mode or telemetry disabled) - system1Enabled = system1FrontendValue; - } - - const resolvedExperiments: Record = {}; - if (system1Enabled !== undefined) { - resolvedExperiments.system1 = system1Enabled; - } - - const resolvedOptions = - Object.keys(resolvedExperiments).length === 0 - ? options - : { - ...options, - experiments: { - ...(options.experiments ?? {}), - ...resolvedExperiments, - }, - }; - - const normalizedOptions = this.normalizeSendMessageAgentId(resolvedOptions); + const normalizedOptions = this.normalizeSendMessageAgentId(options); // Persist last-used model + thinking level for cross-device consistency. await this.maybePersistAISettingsFromOptions(workspaceId, normalizedOptions, "send"); diff --git a/tests/ipc/streaming/system1BashCompaction.matrix.test.ts b/tests/ipc/streaming/system1BashCompaction.matrix.test.ts deleted file mode 100644 index 30cdf84105..0000000000 --- a/tests/ipc/streaming/system1BashCompaction.matrix.test.ts +++ /dev/null @@ -1,221 +0,0 @@ -import * as os from "node:os"; - -import { buildProviderOptions } from "../../../src/common/utils/ai/providerOptions"; -import { getThinkingPolicyForModel } from "../../../src/common/utils/thinking/policy"; -import { THINKING_LEVELS, type ThinkingLevel } from "../../../src/common/types/thinking"; -import { - applySystem1KeepRangesToOutput, - formatNumberedLinesForSystem1, - splitBashOutputLines, -} from "../../../src/node/services/system1/bashOutputFiltering"; -import { runSystem1KeepRangesForBashOutput } from "../../../src/node/services/system1/system1AgentRunner"; -import { createRuntime } from "../../../src/node/runtime/runtimeFactory"; - -import { - cleanupTestEnvironment, - createTestEnvironment, - preloadTestModules, - setupProviders, - shouldRunIntegrationTests, - type TestEnvironment, -} from "../setup"; - -function parseModelString(modelString: string): { provider: string; modelId: string } | null { - const [provider, modelId] = modelString.split(":", 2); - if (!provider || !modelId) { - return null; - } - return { provider, modelId }; -} - -function pickThinkingLevels(levels: readonly ThinkingLevel[]): ThinkingLevel[] { - const normalized = [...levels]; - - if (normalized.length <= 2) { - return normalized; - } - - const min = normalized[0]; - const mid = normalized[Math.floor(normalized.length / 2)]; - const max = normalized[normalized.length - 1]; - - const picked: ThinkingLevel[] = []; - for (const level of [min, mid, max]) { - if (!picked.includes(level)) { - picked.push(level); - } - } - return picked; -} - -function resolveApiKeyForProvider(provider: string): string | null { - if (provider === "anthropic") { - return process.env.ANTHROPIC_API_KEY ?? null; - } - if (provider === "openai") { - return process.env.OPENAI_API_KEY ?? null; - } - if (provider === "google") { - return process.env.GOOGLE_GENERATIVE_AI_API_KEY ?? process.env.GOOGLE_API_KEY ?? null; - } - return null; -} - -const DEFAULT_MODELS = [ - "google:gemini-3-flash-preview", - "anthropic:claude-haiku-4-5", - "openai:gpt-5.2", -]; - -const requestedModels = ( - process.env.SYSTEM1_BASH_COMPACTION_TEST_MODELS - ? process.env.SYSTEM1_BASH_COMPACTION_TEST_MODELS.split(",") - : DEFAULT_MODELS -) - .map((m) => m.trim()) - .filter((m) => m.length > 0); - -const configuredModels = requestedModels.filter((modelString) => { - const parsed = parseModelString(modelString); - if (!parsed) { - return false; - } - - const apiKey = resolveApiKeyForProvider(parsed.provider); - if (!apiKey) { - // eslint-disable-next-line no-console - console.warn( - `Skipping System1 bash compaction integration test for ${modelString}: missing API key env vars for provider ${parsed.provider}` - ); - return false; - } - - return true; -}); - -const shouldRunSuite = shouldRunIntegrationTests() && configuredModels.length > 0; -const describeIntegration = shouldRunSuite ? describe : describe.skip; - -if (shouldRunIntegrationTests() && !shouldRunSuite) { - // eslint-disable-next-line no-console - console.warn( - "Skipping System1 bash compaction integration tests: no configured models (missing API keys)" - ); -} - -if (!shouldRunSuite) { - // Jest still errors on an entirely empty file even when the matrix suite is skipped, - // so keep one placeholder skip when no configured models survive the env filtering. - test.skip("skips when no integration models are configured", () => {}); -} - -const TEST_TIMEOUT_MS = 60_000; - -const ERROR_MARKER = "MUX_SYSTEM1_KEEP_RANGES_TEST_ERROR_MARKER"; -const RAW_OUTPUT = [ - "running...", - "some noise line 1", - "some noise line 2", - `ERROR: ${ERROR_MARKER}`, - " at path/to/file.ts:12:3", - "exited with code 1", -].join("\n"); - -const SCRIPT = "bun test"; -const MAX_KEPT_LINES = 40; - -// This test calls real providers via runSystem1KeepRangesForBashOutput() and validates that we can -// reliably obtain usable keep_ranges for bash output filtering across a model + thinking-level matrix. -describeIntegration("System1 bash output compaction (keep_ranges matrix)", () => { - let env: TestEnvironment; - - beforeAll(async () => { - await preloadTestModules(); - env = await createTestEnvironment(); - - const providers: Record = {}; - for (const modelString of configuredModels) { - const parsed = parseModelString(modelString); - if (!parsed) continue; - if (providers[parsed.provider]) continue; - - const apiKey = resolveApiKeyForProvider(parsed.provider); - if (!apiKey) continue; - - providers[parsed.provider] = { apiKey }; - } - - await setupProviders(env, providers); - }, 30_000); - - afterAll(async () => { - if (env) { - await cleanupTestEnvironment(env); - } - }); - - for (const modelString of configuredModels) { - test( - `should generate keep_ranges for ${modelString}`, - async () => { - const modelResult = await env.services.aiService.createModel(modelString); - expect(modelResult.success).toBe(true); - if (!modelResult.success) { - throw new Error(`Failed to create model ${modelString}: ${modelResult.error}`); - } - - const runtime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - const agentDiscoveryPath = process.cwd(); - const runtimeTempDir = os.tmpdir(); - - const lines = splitBashOutputLines(RAW_OUTPUT); - const numberedOutput = formatNumberedLinesForSystem1(lines); - - const policy = getThinkingPolicyForModel(modelString); - const allowedThinkingLevels = policy.length > 0 ? policy : THINKING_LEVELS; - const thinkingLevels = pickThinkingLevels(allowedThinkingLevels); - - for (const thinkingLevel of thinkingLevels) { - const providerOptions = buildProviderOptions( - modelString, - thinkingLevel, - undefined, - undefined, - undefined, - "system1-test" - ) as unknown as Record; - - const keepRangesResult = await runSystem1KeepRangesForBashOutput({ - runtime, - agentDiscoveryPath, - runtimeTempDir, - model: modelResult.data, - modelString, - providerOptions, - script: SCRIPT, - numberedOutput, - maxKeptLines: MAX_KEPT_LINES, - timeoutMs: 30_000, - }); - - if (!keepRangesResult) { - throw new Error(`Failed to obtain keep_ranges from ${modelString} (${thinkingLevel})`); - } - - const keepRanges = keepRangesResult.keepRanges; - - const applied = applySystem1KeepRangesToOutput({ - rawOutput: RAW_OUTPUT, - keepRanges, - maxKeptLines: MAX_KEPT_LINES, - }); - - expect(applied).toBeDefined(); - expect(applied?.keptLines).toBeLessThanOrEqual(MAX_KEPT_LINES); - expect(applied?.filteredOutput).toContain(ERROR_MARKER); - } - }, - TEST_TIMEOUT_MS - ); - } -}); diff --git a/tests/ui/agents/thinkingPolicy.test.ts b/tests/ui/agents/thinkingPolicy.test.ts deleted file mode 100644 index ae6ae25aa1..0000000000 --- a/tests/ui/agents/thinkingPolicy.test.ts +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Integration test: System 1 settings should only expose thinking levels - * supported by the selected System 1 model. - */ - -import "../dom"; -import { fireEvent, waitFor, within } from "@testing-library/react"; -import userEvent from "@testing-library/user-event"; - -import { updatePersistedState } from "@/browser/hooks/usePersistedState"; -import { EXPERIMENT_IDS, getExperimentKey } from "@/common/constants/experiments"; -import { - PREFERRED_SYSTEM_1_MODEL_KEY, - PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, -} from "@/common/constants/storage"; - -import { shouldRunIntegrationTests } from "../../testUtils"; -import { createAppHarness } from "../harness"; - -const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip; - -const GEMINI_FLASH_PREVIEW = "google:gemini-3-flash-preview"; - -/** - * Regression for: the System 1 Reasoning dropdown showing unsupported options. - * - * Example: - * - Model: gemini-3-flash-preview - * - Stored level: xhigh (unsupported) - * - * Expected: - * - UI clamps display to "high" - * - Dropdown does not include "xhigh" - */ -describeIntegration("System 1 reasoning policy", () => { - test("clamps and filters unsupported thinking levels for the selected model", async () => { - const harness = await createAppHarness({ - branchPrefix: "system1", - beforeRender() { - updatePersistedState(getExperimentKey(EXPERIMENT_IDS.SYSTEM_1), true); - updatePersistedState(PREFERRED_SYSTEM_1_MODEL_KEY, GEMINI_FLASH_PREVIEW); - updatePersistedState(PREFERRED_SYSTEM_1_THINKING_LEVEL_KEY, "xhigh"); - }, - }); - - try { - const doc = harness.view.container.ownerDocument; - const user = userEvent.setup({ document: doc }); - - const canvas = within(harness.view.container); - const settingsButton = await canvas.findByTestId("settings-button", {}, { timeout: 10_000 }); - await user.click(settingsButton); - - // Settings now render as a route page in the main pane (not a modal dialog). - const settingsCanvas = within(harness.view.container); - const body = within(harness.view.container.ownerDocument.body); - - const system1TabButtons = await settingsCanvas.findAllByRole( - "button", - { - name: /system 1/i, - }, - { timeout: 10_000 } - ); - const system1TabButton = system1TabButtons[0]; - if (!system1TabButton) { - throw new Error("System 1 tab button not found"); - } - await user.click(system1TabButton); - - await settingsCanvas.findByText(/System 1 Reasoning/i); - - const reasoningSelect = await waitFor(() => { - const el = harness.view.container.querySelector( - 'button[role="combobox"]' - ) as HTMLButtonElement | null; - if (!el) { - throw new Error("System 1 Reasoning select not found"); - } - return el; - }); - - await waitFor(() => { - const value = reasoningSelect.textContent?.trim(); - if (value !== "high") { - throw new Error(`Expected reasoning value "high" but got ${JSON.stringify(value)}`); - } - }); - - // Radix Select opens on keyboard interactions (ArrowDown/Enter) reliably in tests. - fireEvent.keyDown(reasoningSelect, { key: "ArrowDown" }); - - await body.findByRole("option", { name: "high" }); - - const xhighOption = body.queryByRole("option", { name: "xhigh" }); - if (xhighOption) { - throw new Error( - "Expected System 1 Reasoning dropdown to hide xhigh for gemini-3-flash-preview" - ); - } - } finally { - await harness.dispose(); - } - }, 90_000); -});