diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 5076674..6d5a333 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -639,9 +639,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -730,13 +730,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w) => sqlLike(w)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { @@ -1951,6 +1953,17 @@ async function processPreToolUse(input, deps = {}) { const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? ""; if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); + const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); + if (isReadLike && !hasShellMeta) { + const normalized = rewritePaths(cmd) + " " + rewritePaths(toolPath); + const pathMatch = normalized.match(/\s(\/[\w./_-]+)/); + const cleanPath = pathMatch ? pathMatch[1] : ""; + if (cleanPath && !cleanPath.endsWith("/")) { + logFn(`unsupported command on file, converting to cat: ${cleanPath}`); + return buildAllowDecision(`cat '${cleanPath.replace(/'/g, "'\\''")}'`, "[DeepLake] converted unsupported interpreter read to cat"); + } + } logFn(`unsupported command, returning guidance: ${cmd}`); return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); } diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 0793149..929201c 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -67326,9 +67326,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -67427,13 +67427,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w20) => w20.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w20) => sqlLike(w20)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 51339ff..eb1ef79 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -833,6 +833,38 @@ describe("regex literal prefilter", () => { expect(extractRegexLiteralPrefilter("foo.bar")).toBeNull(); }); + it("rejects alternation containing regex char classes or anchors", () => { + expect(extractRegexAlternationPrefilters("a|b|c[xyz]")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|^bar")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|bar$")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|(bar)")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|{1,2}")).toBeNull(); + }); + + it("rejects alternation with empty branch or trailing escape", () => { + expect(extractRegexAlternationPrefilters("foo||bar")).toBeNull(); + expect(extractRegexAlternationPrefilters("|foo|bar")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|bar|")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo\\")).toBeNull(); + }); + + it("returns null when every alternation branch has no usable literal", () => { + expect(extractRegexAlternationPrefilters("a|b")).toBeNull(); // each branch < 2 chars + expect(extractRegexAlternationPrefilters(".|.|.")).toBeNull(); + }); + + it("returns null when input has no alternation pipe", () => { + expect(extractRegexAlternationPrefilters("foobar")).toBeNull(); + }); + + it("preserves escaped literals across branches", () => { + expect(extractRegexAlternationPrefilters("foo\\.bar|baz")).toEqual(["foo.bar", "baz"]); + }); + + it("dedupes duplicate literals in alternation", () => { + expect(extractRegexAlternationPrefilters("cat|dog|cat")).toEqual(["cat", "dog"]); + }); + it("builds grep search options with regex prefilter when safe", () => { const opts = buildGrepSearchOptions({ pattern: "foo.*bar", @@ -873,6 +905,85 @@ describe("regex literal prefilter", () => { expect(opts.prefilterPatterns).toEqual(["relationship", "partner", "married"]); }); + it("multi-word non-regex pattern populates multiWordPatterns", () => { + const opts = buildGrepSearchOptions({ + pattern: "pottery Melanie Caroline", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.contentScanOnly).toBe(false); + expect(opts.multiWordPatterns).toEqual(["pottery", "Melanie", "Caroline"]); + }); + + it("single-word non-regex pattern leaves multiWordPatterns undefined", () => { + const opts = buildGrepSearchOptions({ + pattern: "Caroline", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.contentScanOnly).toBe(false); + expect(opts.multiWordPatterns).toBeUndefined(); + }); + + it("very short tokens (<= 2 chars) are filtered out of multiWordPatterns", () => { + const opts = buildGrepSearchOptions({ + pattern: "a by the pottery", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + // "a", "by" filtered; "the", "pottery" kept + expect(opts.multiWordPatterns).toEqual(["the", "pottery"]); + }); + + it("regex pattern does not populate multiWordPatterns", () => { + const opts = buildGrepSearchOptions({ + pattern: "foo|bar baz", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.contentScanOnly).toBe(true); + expect(opts.multiWordPatterns).toBeUndefined(); + }); + + it("more than 4 words: only first 4 survive", () => { + const opts = buildGrepSearchOptions({ + pattern: "one two three four five six", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + + expect(opts.multiWordPatterns).toEqual(["one", "two", "three", "four"]); + }); + it("rejects alternation prefilters when grouping makes them unsafe", () => { expect(extractRegexAlternationPrefilters("(foo|bar)")).toBeNull(); expect(extractRegexAlternationPrefilters("foo|bar.*baz")).toEqual(["foo", "bar"]); diff --git a/claude-code/tests/pre-tool-use-branches.test.ts b/claude-code/tests/pre-tool-use-branches.test.ts index cb3de12..c7cda14 100644 --- a/claude-code/tests/pre-tool-use-branches.test.ts +++ b/claude-code/tests/pre-tool-use-branches.test.ts @@ -191,6 +191,67 @@ describe("processPreToolUse: non-memory / no-op paths", () => { expect(d?.command).toContain(`node "/SHELL" -c`); expect(d?.description).toContain("[DeepLake shell]"); }); + + it("rewrites python3 on a tilde memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/data.json" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + expect(d?.command).toContain("/data.json"); + expect(d?.command).not.toContain("RETRY REQUIRED"); + expect(d?.description).toContain("converted unsupported interpreter read to cat"); + }); + + it("rewrites python3 on an absolute memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: `python3 ${MEM_ABS}/session.json` }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + expect(d?.command).toContain("/session.json"); + expect(d?.command).not.toContain("RETRY REQUIRED"); + }); + + it("rewrites node on memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "node ~/.deeplake/memory/foo/bar.json" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + }); + + it("rewrites ruby on memory path to cat", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ruby ~/.deeplake/memory/a.rb" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toMatch(/^cat '\/[^']+'/); + }); + + it("does not rewrite python3 on a memory directory (trailing slash)", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("RETRY REQUIRED"); + }); + + it("does not rewrite when shell metacharacters are present", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/a.json | head" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("RETRY REQUIRED"); + }); + + it("does not rewrite when cmd starts with a non-interpreter", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "curl ~/.deeplake/memory/a.json" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("RETRY REQUIRED"); + }); }); describe("processPreToolUse: Glob / ls branches", () => { diff --git a/claude-code/tests/pre-tool-use.test.ts b/claude-code/tests/pre-tool-use.test.ts index f5bb682..315edfc 100644 --- a/claude-code/tests/pre-tool-use.test.ts +++ b/claude-code/tests/pre-tool-use.test.ts @@ -242,6 +242,140 @@ describe("pre-tool-use: unsafe commands return guidance instead of deny", () => }); }); +describe("pre-tool-use: interpreter read on clean single-file path is rewritten to cat", () => { + it("python3 on tilde-prefixed memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/data.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/data.json"); + expect(r.updatedCommand).not.toContain("RETRY REQUIRED"); + } + }); + + it("python3 on $HOME-prefixed path correctly falls through to RETRY", () => { + // $HOME contains a $ metacharacter, so we can't safely rewrite — the + // hook sends it to the RETRY guidance rather than guess at expansion. + const r = runPreToolUse("Bash", { + command: "python3 $HOME/.deeplake/memory/foo.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toContain("RETRY REQUIRED"); + } + }); + + it("python3 on absolute /home/*/.deeplake/memory path rewrites to cat", () => { + // Simulates what haiku frequently generates — the agent resolves + // ~/ to the absolute home path before passing to Bash. + const { homedir } = require("node:os"); + const cmd = `python3 ${homedir()}/.deeplake/memory/session.json`; + const r = runPreToolUse("Bash", { command: cmd }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/session.json"); + expect(r.updatedCommand).not.toContain("RETRY REQUIRED"); + } + }); + + it("node on tilde-prefixed memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "node ~/.deeplake/memory/locomo_bench/conv_0_session_1.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("conv_0_session_1.json"); + expect(r.updatedCommand).not.toContain("RETRY REQUIRED"); + } + }); + + it("perl on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "perl ~/.deeplake/memory/notes.txt", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/notes.txt"); + } + }); + + it("python3 with shell metacharacter still returns RETRY", () => { + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/file.json | head", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toContain("RETRY REQUIRED"); + } + }); + + it("python3 on directory (trailing slash) returns RETRY, not cat", () => { + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toContain("RETRY REQUIRED"); + } + }); + + it("deno on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "deno ~/.deeplake/memory/config.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + expect(r.updatedCommand).toContain("/config.json"); + } + }); + + it("bun on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "bun ~/.deeplake/memory/script.ts", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + } + }); + + it("ruby on memory file rewrites to cat", () => { + const r = runPreToolUse("Bash", { + command: "ruby ~/.deeplake/memory/a.rb", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.updatedCommand).toMatch(/^cat '\/[^']+'/); + } + }); + + it("auto-read uses single-quote escape for paths containing apostrophes", () => { + // Memory filenames with single quotes are pathological but possible. + // The cat command must escape them with '\''. + const r = runPreToolUse("Bash", { + command: "python3 ~/.deeplake/memory/o'file.json", + }); + expect(r.empty).toBe(false); + if (!r.empty) { + // Either RETRY (if regex rejects the apostrophe) or a properly-escaped cat + if (!r.updatedCommand.includes("RETRY")) { + // Must not close the outer single-quote naively + expect(r.updatedCommand).not.toMatch(/cat '[^']*'[^']+'$/); + } + } + }); +}); + // ── Deeplake CLI commands: no longer supported, should return guidance ──────── describe("pre-tool-use: deeplake CLI commands blocked", () => { diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 28cf31d..9a80604 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -625,9 +625,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -716,13 +716,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w) => sqlLike(w)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 0793149..929201c 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -67326,9 +67326,9 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; - const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]; const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; @@ -67427,13 +67427,15 @@ function buildGrepSearchOptions(params, targetPath) { const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w20) => w20.length > 2).slice(0, 4) : []; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w20) => sqlLike(w20)) : void 0 }; } function buildContentFilter(column, likeOp, patterns) { diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 34c45db..cc1a3a0 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -225,6 +225,30 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT "python, python3, node, and curl are NOT available. " + "You MUST rewrite your command using only the bash tools listed above and try again. " + "For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + + // Fast-path: a clean single-file read attempt by an unsupported interpreter + // (python/node/ruby/perl, no shell metacharacters) gets rewritten to + // `cat ''` so the agent doesn't burn a turn on a RETRY. Anything with + // $(...), backticks, pipes, redirects, or chains falls through to the + // guidance below — safer than trying to rewrite composite commands. + const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); + const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); + if (isReadLike && !hasShellMeta) { + // Normalize path prefix (~/, $HOME/, or absolute /home/user/) to / via + // rewritePaths, then extract the leading memory-relative path. + // This catches all three forms that touchesMemory() accepts. + const normalized = rewritePaths(cmd) + " " + rewritePaths(toolPath); + const pathMatch = normalized.match(/\s(\/[\w./_-]+)/); + const cleanPath = pathMatch ? pathMatch[1] : ""; + if (cleanPath && !cleanPath.endsWith("/")) { + logFn(`unsupported command on file, converting to cat: ${cleanPath}`); + return buildAllowDecision( + `cat '${cleanPath.replace(/'/g, "'\\''")}'`, + "[DeepLake] converted unsupported interpreter read to cat", + ); + } + } + logFn(`unsupported command, returning guidance: ${cmd}`); return buildAllowDecision( `echo ${JSON.stringify(guidance)}`, diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index 6e93c5b..9972594 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -48,6 +48,8 @@ export interface SearchOptions { prefilterPattern?: string; /** Optional safe literal alternation anchors for regex searches (e.g. foo|bar). */ prefilterPatterns?: string[]; + /** Per-word patterns for non-regex multi-word queries (OR-joined). */ + multiWordPatterns?: string[]; /** Per-table row cap. */ limit?: number; } @@ -254,11 +256,11 @@ export async function searchDeeplakeTables( sessionsTable: string, opts: SearchOptions, ): Promise { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts; const limit = opts.limit ?? 100; const filterPatterns = contentScanOnly ? (prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : (prefilterPattern ? [prefilterPattern] : [])) - : [escapedPattern]; + : (multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern]); const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); @@ -377,6 +379,12 @@ export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: stri const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + // For non-regex multi-word patterns, split into per-word OR filters so + // natural-language queries match any token, not only the full phrase. + const multiWordPatterns = (!hasRegexMeta) + ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) + : []; + return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, @@ -384,6 +392,9 @@ export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: stri escapedPattern: sqlLike(params.pattern), prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : undefined, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + multiWordPatterns: multiWordPatterns.length > 1 + ? multiWordPatterns.map((w) => sqlLike(w)) + : undefined, }; }