From 64f2246a5cf3d0db6185a576b96a03c3b5a8ea1a Mon Sep 17 00:00:00 2001 From: kaghni Date: Tue, 21 Apr 2026 18:35:42 +0000 Subject: [PATCH 1/4] feat(pre-tool-use): auto-rewrite unsupported interpreter reads to cat When the agent invokes an unsupported interpreter (python/python3/node/ deno/bun/ruby/perl) on a single memory path with no shell metacharacters, rewrite the call to cat instead of returning the generic RETRY guidance. This avoids burning a turn when the intent is clearly a plain file read. Narrow safety conditions: - Command starts with a recognized read-like interpreter. - No shell metacharacters (dollar-paren, backtick, semicolon, pipe, ampersand, angle brackets, parens, backslash). - Memory path matches a conservative [word/./_/-] regex (no trailing quotes or parens can glue onto the path). - Final cat command wraps the path in single quotes and escapes embedded single quotes, preventing injection. Composite or non-interpreter commands (curl, wget, etc.) still get the RETRY guidance. Tests: 918/918 passing. --- claude-code/bundle/pre-tool-use.js | 11 +++++++++++ src/hooks/pre-tool-use.ts | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 5076674..cafc309 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -1951,6 +1951,17 @@ async function processPreToolUse(input, deps = {}) { const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? ""; if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); + const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); + if (isReadLike && !hasShellMeta) { + const pathMatch = cmd.match(/~\/\.deeplake\/memory\/[\w./_-]+/) || toolPath.match(/~\/\.deeplake\/memory\/[\w./_-]+/); + const memPath = pathMatch ? pathMatch[0] : ""; + const cleanPath = memPath ? rewritePaths(memPath) : ""; + if (cleanPath && !cleanPath.endsWith("/")) { + logFn(`unsupported command on file, converting to cat: ${cleanPath}`); + return buildAllowDecision(`cat '${cleanPath.replace(/'/g, "'\\''")}'`, "[DeepLake] converted unsupported interpreter read to cat"); + } + } logFn(`unsupported command, returning guidance: ${cmd}`); return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); } diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 34c45db..508e9a1 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -225,6 +225,28 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT "python, python3, node, and curl are NOT available. " + "You MUST rewrite your command using only the bash tools listed above and try again. " + "For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + + // Fast-path: a clean single-file read attempt by an unsupported interpreter + // (python/node/ruby/perl, no shell metacharacters) gets rewritten to + // `cat ''` so the agent doesn't burn a turn on a RETRY. Anything with + // $(...), backticks, pipes, redirects, or chains falls through to the + // guidance below — safer than trying to rewrite composite commands. + const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); + const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); + if (isReadLike && !hasShellMeta) { + const pathMatch = cmd.match(/~\/\.deeplake\/memory\/[\w./_-]+/) + || toolPath.match(/~\/\.deeplake\/memory\/[\w./_-]+/); + const memPath = pathMatch ? pathMatch[0] : ""; + const cleanPath = memPath ? rewritePaths(memPath) : ""; + if (cleanPath && !cleanPath.endsWith("/")) { + logFn(`unsupported command on file, converting to cat: ${cleanPath}`); + return buildAllowDecision( + `cat '${cleanPath.replace(/'/g, "'\\''")}'`, + "[DeepLake] converted unsupported interpreter read to cat", + ); + } + } + logFn(`unsupported command, returning guidance: ${cmd}`); return buildAllowDecision( `echo ${JSON.stringify(guidance)}`, From 04bf1d5dd1a7cdc709361e3d795d743fb786821a Mon Sep 17 00:00:00 2001 From: kaghni Date: Tue, 21 Apr 2026 19:33:56 +0000 Subject: [PATCH 2/4] feat(grep-core): multi-word OR split for non-regex patterns When a non-regex grep pattern contains multiple words, split into per-word OR filters instead of requiring the full literal phrase to appear verbatim in the summary. Natural-language queries like Melanie kids dinosaurs now match sessions that mention any of the three tokens, not only those that contain all three in that exact order. Single-word and regex patterns are unchanged. Implementation: buildGrepSearchOptions now populates a new optional multiWordPatterns field on SearchOptions when the pattern splits into >1 word-long tokens. searchDeeplakeTables consumes that field in the non-contentScanOnly branch, falling back to the single escapedPattern otherwise. All downstream OR joining is handled by the existing buildContentFilter helper. Tests: 918/918 passing. --- claude-code/bundle/pre-tool-use.js | 8 +++++--- claude-code/bundle/shell/deeplake-shell.js | 8 +++++--- codex/bundle/pre-tool-use.js | 8 +++++--- codex/bundle/shell/deeplake-shell.js | 8 +++++--- src/shell/grep-core.ts | 15 +++++++++++++-- 5 files changed, 33 insertions(+), 14 deletions(-) diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index cafc309..b4bbe4c 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -639,9 +639,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -730,13 +730,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w) => sqlLike(w)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 0793149..929201c 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -67326,9 +67326,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -67427,13 +67427,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w20) => w20.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w20) => sqlLike(w20)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 28cf31d..9a80604 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -625,9 +625,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -716,13 +716,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w) => sqlLike(w)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 0793149..929201c 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -67326,9 +67326,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -67427,13 +67427,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w20) => w20.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w20) => sqlLike(w20)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index 6e93c5b..9972594 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -48,6 +48,8 @@ export interface SearchOptions { prefilterPattern?: string; /** Optional safe literal alternation anchors for regex searches (e.g. foo|bar). */ prefilterPatterns?: string[]; + /** Per-word patterns for non-regex multi-word queries (OR-joined). */ + multiWordPatterns?: string[]; /** Per-table row cap. */ limit?: number; } @@ -254,11 +256,11 @@ export async function searchDeeplakeTables( sessionsTable: string, opts: SearchOptions, ): Promise { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; const filterPatterns = contentScanOnly ? (prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : (prefilterPattern ? [prefilterPattern] : [])) - : [escapedPattern]; + : (multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]); const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); @@ -377,6 +379,12 @@ export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: stri const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + // For non-regex multi-word patterns, split into per-word OR filters so + // natural-language queries match any token, not only the full phrase. + const multiWordPatterns = (!hasRegexMeta) + ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) + : []; + return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, @@ -384,6 +392,9 @@ export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: stri escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : undefined, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 + ? multiWordPatterns.map((w) => sqlLike(w)) + : undefined, }; } From f852bbe56fa7778635b791381797b33ffef761bf Mon Sep 17 00:00:00 2001 From: kaghni Date: Tue, 21 Apr 2026 21:10:51 +0000 Subject: [PATCH 3/4] fix: auto-read handles absolute paths + happy-path test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two PR review comments on the auto-read conversion: 1. Path regex only matched tilde-prefixed memory paths. When haiku generated an absolute path (e.g. an expanded home path pointing into the memory tree), touchesMemory() returned true and isReadLike passed, but pathMatch never fired, so cleanPath was empty and auto-read silently fell through to the RETRY guidance — dead code for the most common case. Fix: run rewritePaths() on both cmd and toolPath first. rewritePaths already normalizes all three variants (tilde, dollar-HOME, absolute home dir). Then extract the normalized /-leading path from the result. 2. No happy-path test for the rewrite itself. The existing 32 pre-tool-use tests all covered the negative path — commands with shell metacharacters still returning RETRY. Nothing asserted that python3 on a clean memory file actually becomes cat. The regex bug above could have silently regressed with no test failure. Added: - 9 pre-tool-use.test.ts cases covering python3/node/bun/deno/ruby happy path, absolute-path variant, trailing-slash rejection, shell-meta fall-through, dollar-HOME variable fall-through (must NOT rewrite because dollar is a shell metacharacter), single-quote escape check. - 5 grep-core.test.ts cases covering buildGrepSearchOptions multi-word splitting: basic, single-word, short-token filter, regex fallback, 4-word cap. Tests: 934 passing total (up from 918). Branch coverage for grep-core 89.88% and pre-tool-use 87.25% — improved but still below the 90% thresholds enforced by the coverage CI job. --- claude-code/bundle/pre-tool-use.js | 6 +- claude-code/tests/grep-core.test.ts | 79 +++++++++++++++ claude-code/tests/pre-tool-use.test.ts | 134 +++++++++++++++++++++++++ src/hooks/pre-tool-use.ts | 10 +- 4 files changed, 222 insertions(+), 7 deletions(-) diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index b4bbe4c..6d5a333 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -1956,9 +1956,9 @@ async function processPreToolUse(input, deps = {}) { const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); if (isReadLike && !hasShellMeta) { - const pathMatch = cmd.match(/~\/\.deeplake\/memory\/[\w./_-]+/) || toolPath.match(/~\/\.deeplake\/memory\/[\w./_-]+/); - const memPath = pathMatch ? pathMatch[0] : ""; - const cleanPath = memPath ? rewritePaths(memPath) : ""; + const normalized = rewritePaths(cmd) + " " + rewritePaths(toolPath); + const pathMatch = normalized.match(/\s(\/[\w./_-]+)/); + const cleanPath = pathMatch ? pathMatch[1] : ""; if (cleanPath && !cleanPath.endsWith("/")) { logFn(`unsupported command on file, converting to cat: ${cleanPath}`); return buildAllowDecision(`cat '${cleanPath.replace(/'/g, "'\\''")}'`, "[DeepLake] converted unsupported interpreter read to cat"); diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 51339ff..a6b9d06 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -873,6 +873,85 @@ describe("regex literal prefilter", () => { expect(opts.prefilterPatterns).toEqual(["relationship", "partner", "married"]); }); + it("multi-word non-regex pattern populates multiWordPatterns", () => { + const opts = buildGrepSearchOptions({ + pattern: "pottery Melanie Caroline", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.contentScanOnly).toBe(false); + expect(opts.multiWordPatterns).toEqual(["pottery", "Melanie", "Caroline"]); + }); + + it("single-word non-regex pattern leaves multiWordPatterns undefined", () => { + const opts = buildGrepSearchOptions({ + pattern: "Caroline", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.contentScanOnly).toBe(false); + expect(opts.multiWordPatterns).toBeUndefined(); + }); + + it("very short tokens (<= 2 chars) are filtered out of multiWordPatterns", () => { + const opts = buildGrepSearchOptions({ + pattern: "a by the pottery", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + // "a", "by" filtered; "the", "pottery" kept + expect(opts.multiWordPatterns).toEqual(["the", "pottery"]); + }); + + it("regex pattern does not populate multiWordPatterns", () => { + const opts = buildGrepSearchOptions({ + pattern: "foo|bar baz", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.contentScanOnly).toBe(true); + expect(opts.multiWordPatterns).toBeUndefined(); + }); + + it("more than 4 words: only first 4 survive", () => { + const opts = buildGrepSearchOptions({ + pattern: "one two three four five six", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.multiWordPatterns).toEqual(["one", "two", "three", "four"]); + }); + it("rejects alternation prefilters when grouping makes them unsafe", () => { expect(extractRegexAlternationPrefilters("(foo|bar)")).toBeNull(); expect(extractRegexAlternationPrefilters("foo|bar.*baz")).toEqual(["foo", "bar"]); diff --git a/claude-code/tests/pre-tool-use.test.ts b/claude-code/tests/pre-tool-use.test.ts index f5bb682..315edfc 100644 --- a/claude-code/tests/pre-tool-use.test.ts +++ b/claude-code/tests/pre-tool-use.test.ts @@ -242,6 +242,140 @@ describe("pre-tool-use: unsafe commands return guidance instead of deny", () => }); }); +describe("pre-tool-use: interpreter read on clean single-file path is rewritten to cat", () => { + it("python3 on tilde-prefixed memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/data.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/data.json"); + expect(r.updatedCommand).not.toContain("RETRY REQUIRED"); + } + }); + + it("python3 on $HOME-prefixed path correctly falls through to RETRY", () => { + // $HOME contains a $ metacharacter, so we can't safely rewrite — the + // hook sends it to the RETRY guidance rather than guess at expansion. + const r = runPreToolUse("Bash", { + command: "python3 $HOME/.deeplake/memory/foo.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toContain("RETRY REQUIRED"); + } + }); + + it("python3 on absolute /home/*/.deeplake/memory path rewrites to cat", () => { + // Simulates what haiku frequently generates — the agent resolves + // ~/ to the absolute home path before passing to Bash. + const { homedir } = require("node:os"); + const cmd = `python3 ${homedir()}/.deeplake/memory/session.json`; + const r = runPreToolUse("Bash", { command: cmd }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/session.json"); + expect(r.updatedCommand).not.toContain("RETRY REQUIRED"); + } + }); + + it("node on tilde-prefixed memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "node ~/.deeplake/memory/locomo_bench/conv_0_session_1.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("conv_0_session_1.json"); + expect(r.updatedCommand).not.toContain("RETRY REQUIRED"); + } + }); + + it("perl on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "perl ~/.deeplake/memory/notes.txt", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/notes.txt"); + } + }); + + it("python3 with shell metacharacter still returns RETRY", () => { + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/file.json | head", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toContain("RETRY REQUIRED"); + } + }); + + it("python3 on directory (trailing slash) returns RETRY, not cat", () => { + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toContain("RETRY REQUIRED"); + } + }); + + it("deno on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "deno ~/.deeplake/memory/config.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/config.json"); + } + }); + + it("bun on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "bun ~/.deeplake/memory/script.ts", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + } + }); + + it("ruby on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "ruby ~/.deeplake/memory/a.rb", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + } + }); + + it("auto-read uses single-quote escape for paths containing apostrophes", () => { + // Memory filenames with single quotes are pathological but possible. + // The cat command must escape them with '\''. + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/o'file.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + // Either RETRY (if regex rejects the apostrophe) or a properly-escaped cat + if (!r.updatedCommand.includes("RETRY")) { + // Must not close the outer single-quote naively + expect(r.updatedCommand).not.toMatch(/cat '[^']*'[^']+'$/); + } + } + }); +}); + // ── Deeplake CLI commands: no longer supported, should return guidance ──────── describe("pre-tool-use: deeplake CLI commands blocked", () => { diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 508e9a1..cc1a3a0 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -234,10 +234,12 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); if (isReadLike && !hasShellMeta) { - const pathMatch = cmd.match(/~\/\.deeplake\/memory\/[\w./_-]+/) - || toolPath.match(/~\/\.deeplake\/memory\/[\w./_-]+/); - const memPath = pathMatch ? pathMatch[0] : ""; - const cleanPath = memPath ? rewritePaths(memPath) : ""; + // Normalize path prefix (~/, $HOME/, or absolute /home/user/) to / via + // rewritePaths, then extract the leading memory-relative path. + // This catches all three forms that touchesMemory() accepts. + const normalized = rewritePaths(cmd) + " " + rewritePaths(toolPath); + const pathMatch = normalized.match(/\s(\/[\w./_-]+)/); + const cleanPath = pathMatch ? pathMatch[1] : ""; if (cleanPath && !cleanPath.endsWith("/")) { logFn(`unsupported command on file, converting to cat: ${cleanPath}`); return buildAllowDecision( From 5ddf0df4b61f38e9a355ec900ce296be17b6cc66 Mon Sep 17 00:00:00 2001 From: kaghni Date: Tue, 21 Apr 2026 21:46:51 +0000 Subject: [PATCH 4/4] test: lift pre-tool-use + grep-core branch coverage over 90% threshold Coverage CI was failing at 87.25% (pre-tool-use) and 89.88% (grep-core), both below the 90% threshold. The subprocess-based e2e tests in pre-tool-use.test.ts exercise the rewrite-to-cat path but vitest c8 does not instrument subprocesses, so those branches looked uncovered. Added two targeted test batches that call the exported functions directly: - pre-tool-use-branches.test.ts: 7 new cases for processPreToolUse covering the auto-read happy path (python3/node/ruby on tilde and absolute memory paths, description assertion) and its guards (trailing-slash rejection, shell-meta fall-through, non-interpreter command fall-through). - grep-core.test.ts: 6 new cases for extractRegexAlternationPrefilters covering the null-return branches (char class, anchor, parens, quantifier, empty branch, leading/trailing pipe, trailing escape) plus dedupe and no-pipe fast-return. Results: 947 tests (up from 934). Branch coverage pre-tool-use 87.25% -> 90.43%, grep-core 89.88% -> 91.01%. --- claude-code/tests/grep-core.test.ts | 32 ++++++++++ .../tests/pre-tool-use-branches.test.ts | 61 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index a6b9d06..eb1ef79 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -833,6 +833,38 @@ describe("regex literal prefilter", () => { expect(extractRegexLiteralPrefilter("foo.bar")).toBeNull(); }); + it("rejects alternation containing regex char classes or anchors", () => { + expect(extractRegexAlternationPrefilters("a|b|c[xyz]")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|^bar")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|bar$")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|(bar)")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|{1,2}")).toBeNull(); + }); + + it("rejects alternation with empty branch or trailing escape", () => { + expect(extractRegexAlternationPrefilters("foo||bar")).toBeNull(); + expect(extractRegexAlternationPrefilters("|foo|bar")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|bar|")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo\\")).toBeNull(); + }); + + it("returns null when every alternation branch has no usable literal", () => { + expect(extractRegexAlternationPrefilters("a|b")).toBeNull(); // each branch < 2 chars + expect(extractRegexAlternationPrefilters(".|.|.")).toBeNull(); + }); + + it("returns null when input has no alternation pipe", () => { + expect(extractRegexAlternationPrefilters("foobar")).toBeNull(); + }); + + it("preserves escaped literals across branches", () => { + expect(extractRegexAlternationPrefilters("foo\\.bar|baz")).toEqual(["foo.bar", "baz"]); + }); + + it("dedupes duplicate literals in alternation", () => { + expect(extractRegexAlternationPrefilters("cat|dog|cat")).toEqual(["cat", "dog"]); + }); + it("builds grep search options with regex prefilter when safe", () => { const opts = buildGrepSearchOptions({ pattern: "foo.*bar", diff --git a/claude-code/tests/pre-tool-use-branches.test.ts b/claude-code/tests/pre-tool-use-branches.test.ts index cb3de12..c7cda14 100644 --- a/claude-code/tests/pre-tool-use-branches.test.ts +++ b/claude-code/tests/pre-tool-use-branches.test.ts @@ -191,6 +191,67 @@ describe("processPreToolUse: non-memory / no-op paths", () => { expect(d?.command).toContain(`node "/SHELL" -c`); expect(d?.description).toContain("[DeepLake shell]"); }); + + it("rewrites python3 on a tilde memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/data.json" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + expect(d?.command).toContain("/data.json"); + expect(d?.command).not.toContain("RETRY REQUIRED"); + expect(d?.description).toContain("converted unsupported interpreter read to cat"); + }); + + it("rewrites python3 on an absolute memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: `python3 ${MEM_ABS}/session.json` }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + expect(d?.command).toContain("/session.json"); + expect(d?.command).not.toContain("RETRY REQUIRED"); + }); + + it("rewrites node on memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "node ~/.deeplake/memory/foo/bar.json" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + }); + + it("rewrites ruby on memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ruby ~/.deeplake/memory/a.rb" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + }); + + it("does not rewrite python3 on a memory directory (trailing slash)", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("RETRY REQUIRED"); + }); + + it("does not rewrite when shell metacharacters are present", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/a.json | head" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("RETRY REQUIRED"); + }); + + it("does not rewrite when cmd starts with a non-interpreter", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "curl ~/.deeplake/memory/a.json" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("RETRY REQUIRED"); + }); }); describe("processPreToolUse: Glob / ls branches", () => {