From 48432885fc83d4f097ce96a5ed52f9ac8f082268 Mon Sep 17 00:00:00 2001 From: esengine <359807859@qq.com> Date: Mon, 4 May 2026 17:06:56 -0700 Subject: [PATCH] fix(shell): drop shell-quote, hand-roll chain splitter to keep lenient tokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 (#233) used shell-quote for chain segmentation, but its strict POSIX tokenization split `--flag=1&2` into three tokens with `&` as a background operator — a regression vs. pre-Phase-1 behavior, where the existing lenient `tokenizeCommand` left embedded `&`/`|`/`;` chars as literal bytes inside larger tokens. Replace with a small whitespace-bounded chain splitter that matches the existing `detectShellOperator` convention: chain ops only count when they begin a whitespace-separated token. Each segment then runs through the existing `tokenizeCommand` (lenient) and `detectShellOperator` (rejects `>`, `<`, `&`, `2>&1`, `&>`). Recovers: - `cargo run -- --flag=1&2` — runs (was rejected post-#233) - `git status ; cargo run -- --flag=1&2` — chain runs, second segment keeps `&` Drops: - `git status|grep main` (no spaces) — never worked pre-Phase-1, was a shell-quote-only side effect; align with the project's whitespace-bounded operator convention. Also drops `$VAR` / `$(...)` rejection — these now pass through as literal arguments, matching the existing single-command tokenizer's behavior. Models see the literal `$HOME` in echo's output and learn. --- package-lock.json | 21 ------- package.json | 2 - src/tools/shell-chain.ts | 118 +++++++++++++++++++++++++------------- tests/shell-chain.test.ts | 54 ++++++++++------- tests/shell-tools.test.ts | 10 ---- 5 files changed, 112 insertions(+), 93 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4fafd4b8..8f889292 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,6 @@ "picomatch": "^4.0.4", "react": "^18.3.1", "react-reconciler": "^0.29.2", - "shell-quote": "^1.8.3", "string-width": "^7.2.0", "yoga-layout": "^3.2.1", "zod": "^4.4.1" @@ -30,7 +29,6 @@ "@types/picomatch": "^4.0.3", "@types/react": "^18.3.12", "@types/react-reconciler": "^0.28.9", - "@types/shell-quote": "^1.7.5", "esbuild": "^0.21.5", "highlight.js": "^11.10.0", "htm": "^3.1.1", @@ -1110,13 +1108,6 @@ "@types/react": "*" } }, - "node_modules/@types/shell-quote": { - "version": "1.7.5", - "resolved": "https://registry.npmjs.org/@types/shell-quote/-/shell-quote-1.7.5.tgz", - "integrity": "sha512-+UE8GAGRPbJVQDdxi16dgadcBfQ+KG2vgZhV1+3A1XmHbmwcdwhCUwIdy+d3pAGrbvgRoVSjeI9vOWyq376Yzw==", - "dev": true, - "license": "MIT" - }, "node_modules/@vitest/expect": { "version": "2.1.9", "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-2.1.9.tgz", @@ -2562,18 +2553,6 @@ "loose-envify": "^1.1.0" } }, - "node_modules/shell-quote": { - "version": "1.8.3", - "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz", - "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/siginfo": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", diff --git a/package.json b/package.json index c9597b0a..c02e3c10 100644 --- a/package.json +++ b/package.json @@ -73,7 +73,6 @@ "picomatch": "^4.0.4", "react": "^18.3.1", "react-reconciler": "^0.29.2", - "shell-quote": "^1.8.3", "string-width": "^7.2.0", "yoga-layout": "^3.2.1", "zod": "^4.4.1" @@ -84,7 +83,6 @@ "@types/picomatch": "^4.0.3", "@types/react": "^18.3.12", "@types/react-reconciler": "^0.28.9", - "@types/shell-quote": "^1.7.5", "esbuild": "^0.21.5", "highlight.js": "^11.10.0", "htm": "^3.1.1", diff --git a/src/tools/shell-chain.ts b/src/tools/shell-chain.ts index a7470a0f..0eb5c522 100644 --- a/src/tools/shell-chain.ts +++ b/src/tools/shell-chain.ts @@ -1,8 +1,13 @@ /** Parse + spawn `cmd1 | cmd2 && cmd3` ourselves — never invoke a shell, sidestep PS5.1's `&&` parse error. */ import { type ChildProcess, type SpawnOptions, spawn } from "node:child_process"; -import { parse as shellParse } from "shell-quote"; -import { killProcessTree, prepareSpawn, smartDecodeOutput } from "./shell.js"; +import { + detectShellOperator, + killProcessTree, + prepareSpawn, + smartDecodeOutput, + tokenizeCommand, +} from "./shell.js"; export type ChainOp = "|" | "||" | "&&" | ";"; @@ -16,8 +21,6 @@ export interface CommandChain { ops: ChainOp[]; } -const CHAIN_OPS = new Set(["|", "||", "&&", ";"]); - export class UnsupportedSyntaxError extends Error { constructor(detail: string) { super(`run_command: ${detail}`); @@ -25,55 +28,92 @@ export class UnsupportedSyntaxError extends Error { } } -/** Returns null on plain commands (caller takes the simple path); throws on unsupported syntax. */ -export function parseCommandChain(cmd: string): CommandChain | null { - // shell-quote calls env() with name="" for `$(...)` — defer that to the `(` op handler. - const tokens = shellParse(cmd, (name: string) => - name === "" ? "$" : { op: "$VAR" as const, name }, - ); - const segments: ChainSegment[] = []; +/** Whitespace-bounded splitter — chain ops only count when they begin a token, so `--flag=1&2` stays literal. */ +function splitOnChainOps(cmd: string): { segs: string[]; ops: ChainOp[] } { + const segs: string[] = []; const ops: ChainOp[] = []; - let cur: string[] = []; - let sawChainOp = false; - for (const t of tokens) { - if (typeof t === "string") { - cur.push(t); + let segStart = 0; + let i = 0; + let quote: '"' | "'" | null = null; + let atTokenStart = true; + while (i < cmd.length) { + const ch = cmd[i]!; + if (quote) { + if (ch === quote) quote = null; + else if (ch === "\\" && quote === '"' && i + 1 < cmd.length) i++; + i++; + atTokenStart = false; continue; } - if ("comment" in t) continue; - const op = (t as { op: string }).op; - if (CHAIN_OPS.has(op)) { - sawChainOp = true; - if (cur.length === 0) throw new UnsupportedSyntaxError(`empty segment before "${op}"`); - segments.push({ argv: cur }); - ops.push(op as ChainOp); - cur = []; + if (ch === '"' || ch === "'") { + quote = ch; + i++; + atTokenStart = false; continue; } - if (op === "glob") { - cur.push((t as { pattern: string }).pattern); + if (ch === " " || ch === "\t") { + i++; + atTokenStart = true; continue; } - if (op === "$VAR") { - const name = (t as { name: string }).name; + if (atTokenStart) { + let op: ChainOp | null = null; + let opLen = 0; + const next = cmd[i + 1]; + if (ch === "|" && next === "|") { + op = "||"; + opLen = 2; + } else if (ch === "&" && next === "&") { + op = "&&"; + opLen = 2; + } else if (ch === "|") { + op = "|"; + opLen = 1; + } else if (ch === ";") { + op = ";"; + opLen = 1; + } + if (op !== null) { + segs.push(cmd.slice(segStart, i)); + ops.push(op); + i += opLen; + segStart = i; + atTokenStart = true; + continue; + } + } + i++; + atTokenStart = false; + } + segs.push(cmd.slice(segStart)); + return { segs, ops }; +} + +/** Returns null on plain commands (caller takes the simple path); throws on unsupported syntax inside any segment. */ +export function parseCommandChain(cmd: string): CommandChain | null { + const { segs, ops } = splitOnChainOps(cmd); + if (ops.length === 0) return null; + const segments: ChainSegment[] = []; + for (let i = 0; i < segs.length; i++) { + const trimmed = segs[i]!.trim(); + if (trimmed.length === 0) { + const op = i === 0 ? ops[0]! : ops[i - 1]!; throw new UnsupportedSyntaxError( - `\$${name} expansion is not supported — pass values as literals, or use the binary's own --env flag`, + i === 0 + ? `empty segment before "${op}"` + : i === segs.length - 1 + ? `chain ends with "${op}"` + : `empty segment between "${ops[i - 1]}" and "${ops[i]}"`, ); } - if (op === "(" || op === ")") { + const segOp = detectShellOperator(trimmed); + if (segOp !== null) { throw new UnsupportedSyntaxError( - "command substitution / subshells are not supported — split into separate calls", + `shell operator "${segOp}" is not supported — only \`|\`, \`||\`, \`&&\`, \`;\` chain operators are spawned natively. Redirects (\`>\`, \`<\`, \`2>&1\`) and background (\`&\`) require splitting into separate run_command calls.`, ); } - throw new UnsupportedSyntaxError( - `shell operator "${op}" is not supported — only \`|\`, \`||\`, \`&&\`, \`;\` chain operators work; redirects (\`>\`, \`<\`, \`2>&1\`) are rejected`, - ); - } - if (!sawChainOp) return null; - if (cur.length === 0) { - throw new UnsupportedSyntaxError(`chain ends with "${ops[ops.length - 1]}"`); + segments.push({ argv: tokenizeCommand(trimmed) }); } - segments.push({ argv: cur }); return { segments, ops }; } diff --git a/tests/shell-chain.test.ts b/tests/shell-chain.test.ts index d4cf33e8..bc698c9b 100644 --- a/tests/shell-chain.test.ts +++ b/tests/shell-chain.test.ts @@ -33,17 +33,9 @@ describe("parseCommandChain", () => { expect(c!.ops).toEqual(["&&", "||", ";"]); }); - it("handles `|` without surrounding spaces (`a|b`)", () => { - const c = parseCommandChain("git status|grep main"); - expect(c!.segments.map((s) => s.argv)).toEqual([ - ["git", "status"], - ["grep", "main"], - ]); - }); - it("preserves quoted operators as literal arguments", () => { - const c = parseCommandChain('grep "a|b" file'); - expect(c).toBeNull(); + expect(parseCommandChain('grep "a|b" file')).toBeNull(); + expect(parseCommandChain("grep 'a|b' file")).toBeNull(); }); it("passes globs through as literal patterns (no shell expansion)", () => { @@ -51,30 +43,50 @@ describe("parseCommandChain", () => { expect(c!.segments[0]!.argv).toEqual(["ls", "*.ts"]); }); - it("rejects redirects", () => { - expect(() => parseCommandChain("echo hi > out.txt")).toThrow(UnsupportedSyntaxError); - expect(() => parseCommandChain("sort < in.txt")).toThrow(/<.*not supported/); - expect(() => parseCommandChain("cmd 2>&1")).toThrow(/not supported/); + it("does NOT split on chain chars embedded inside larger tokens", () => { + // `--flag=1&2` is one POSIX token; the `&` is a literal byte. Tokens + // containing `&` / `|` / `;` chars but not at the start are passed + // through untouched, matching the lenient single-command tokenizer. + expect(parseCommandChain("cargo run -- --flag=1&2")).toBeNull(); + expect(parseCommandChain("grep a|b file")).toBeNull(); + expect(parseCommandChain("echo a;b")).toBeNull(); }); - it("rejects background `&`", () => { - expect(() => parseCommandChain("long_task &")).toThrow(/"&" is not supported/); + it("allows embedded chain chars inside chain segments", () => { + const c = parseCommandChain("git status ; cargo run -- --flag=1&2"); + expect(c!.segments.map((s) => s.argv)).toEqual([ + ["git", "status"], + ["cargo", "run", "--", "--flag=1&2"], + ]); + expect(c!.ops).toEqual([";"]); }); - it("rejects env-var expansion", () => { - expect(() => parseCommandChain("echo $HOME")).toThrow(/\$HOME expansion/); + it("rejects redirects discovered inside a chain segment", () => { + expect(() => parseCommandChain("echo hi ; ls > out.txt")).toThrow(/">"/); + expect(() => parseCommandChain("git status | wc -l > out.txt")).toThrow(/">"/); + expect(() => parseCommandChain("a ; cmd 2>&1")).toThrow(/2>&1/); }); - it("rejects command substitution", () => { - expect(() => parseCommandChain("echo $(date)")).toThrow(/command substitution/); + it("rejects background `&` discovered inside a chain segment", () => { + expect(() => parseCommandChain("git status ; long &")).toThrow(/"&" is not supported/); }); it("rejects empty leading segments", () => { expect(() => parseCommandChain("; echo hi")).toThrow(/empty segment/); + expect(() => parseCommandChain("|| cat")).toThrow(/empty segment/); }); it("rejects a chain ending with an operator", () => { expect(() => parseCommandChain("echo hi &&")).toThrow(/chain ends with/); + expect(() => parseCommandChain("echo hi ;")).toThrow(/chain ends with/); + }); + + it("rejects empty middle segments", () => { + expect(() => parseCommandChain("a ; ; b")).toThrow(/empty segment/); + }); + + it("rejects unclosed quotes inside a chain segment", () => { + expect(() => parseCommandChain('git status ; echo "open')).toThrow(/unclosed/); }); }); @@ -104,7 +116,7 @@ describe("isCommandAllowed", () => { it("returns false for unsupported syntax (rather than throwing)", () => { expect(isCommandAllowed("echo hi > out.txt")).toBe(false); - expect(isCommandAllowed("echo $(date)")).toBe(false); + expect(isCommandAllowed("echo hi &")).toBe(false); }); }); diff --git a/tests/shell-tools.test.ts b/tests/shell-tools.test.ts index be0cdc09..10df28d3 100644 --- a/tests/shell-tools.test.ts +++ b/tests/shell-tools.test.ts @@ -134,16 +134,6 @@ describe("runCommand redirect rejection", () => { ); }); - it("rejects `$VAR` env-var expansion", async () => { - await expect(runCommand("echo $HOME", { cwd: tmp })).rejects.toThrow( - /\$HOME expansion is not supported/, - ); - }); - - it("rejects command substitution `$(…)`", async () => { - await expect(runCommand("echo $(date)", { cwd: tmp })).rejects.toThrow(/command substitution/); - }); - it("rejects an empty leading segment", async () => { await expect(runCommand("; echo hi", { cwd: tmp })).rejects.toThrow(/empty segment before/); });