activeloopai · kaghni · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js
@@ -639,9 +639,9 @@ function buildPathCondition(targetPath) {
   return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
 }
 async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
-  const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts;
+  const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
   const limit = opts.limit ?? 100;
-  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
+  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
   const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
   const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
   const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`;
@@ -730,13 +730,15 @@ function buildGrepSearchOptions(params, targetPath) {
   const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern);
   const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null;
   const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null;
+  const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w) => w.length > 2).slice(0, 4) : [];
   return {
     pathFilter: buildPathFilter(targetPath),
     contentScanOnly: hasRegexMeta,
     likeOp: params.ignoreCase ? "ILIKE" : "LIKE",
     escapedPattern: sqlLike(params.pattern),
     prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
-    prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal))
+    prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
+    multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w) => sqlLike(w)) : void 0
   };
 }
 function buildContentFilter(column, likeOp, patterns) {
@@ -1951,6 +1953,17 @@ async function processPreToolUse(input, deps = {}) {
   const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? "";
   if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) {
     const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'.";
+    const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim());
+    const hasShellMeta = /[$`;|&<>()\\]/.test(cmd);
+    if (isReadLike && !hasShellMeta) {
+      const normalized = rewritePaths(cmd) + " " + rewritePaths(toolPath);
+      const pathMatch = normalized.match(/\s(\/[\w./_-]+)/);
+      const cleanPath = pathMatch ? pathMatch[1] : "";
+      if (cleanPath && !cleanPath.endsWith("/")) {
+        logFn(`unsupported command on file, converting to cat: ${cleanPath}`);
+        return buildAllowDecision(`cat '${cleanPath.replace(/'/g, "'\\''")}'`, "[DeepLake] converted unsupported interpreter read to cat");
+      }
+    }
     logFn(`unsupported command, returning guidance: ${cmd}`);
     return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, "[DeepLake] unsupported command \u2014 rewrite using bash builtins");
   }

diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js
@@ -67326,9 +67326,9 @@ function buildPathCondition(targetPath) {
   return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
 }
 async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
-  const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts;
+  const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
   const limit = opts.limit ?? 100;
-  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
+  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
   const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
   const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
   const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`;
@@ -67427,13 +67427,15 @@ function buildGrepSearchOptions(params, targetPath) {
   const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern);
   const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null;
   const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null;
+  const multiWordPatterns = !hasRegexMeta ? params.pattern.split(/\s+/).filter((w20) => w20.length > 2).slice(0, 4) : [];
   return {
     pathFilter: buildPathFilter(targetPath),
     contentScanOnly: hasRegexMeta,
     likeOp: params.ignoreCase ? "ILIKE" : "LIKE",
     escapedPattern: sqlLike(params.pattern),
     prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
-    prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal))
+    prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
+    multiWordPatterns: multiWordPatterns.length > 1 ? multiWordPatterns.map((w20) => sqlLike(w20)) : void 0
   };
 }
 function buildContentFilter(column, likeOp, patterns) {

diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts
@@ -833,6 +833,38 @@ describe("regex literal prefilter", () => {
     expect(extractRegexLiteralPrefilter("foo.bar")).toBeNull();
   });
 
+  it("rejects alternation containing regex char classes or anchors", () => {
+    expect(extractRegexAlternationPrefilters("a|b|c[xyz]")).toBeNull();
+    expect(extractRegexAlternationPrefilters("foo|^bar")).toBeNull();
+    expect(extractRegexAlternationPrefilters("foo|bar$")).toBeNull();
+    expect(extractRegexAlternationPrefilters("foo|(bar)")).toBeNull();
+    expect(extractRegexAlternationPrefilters("foo|{1,2}")).toBeNull();
+  });
+
+  it("rejects alternation with empty branch or trailing escape", () => {
+    expect(extractRegexAlternationPrefilters("foo||bar")).toBeNull();
+    expect(extractRegexAlternationPrefilters("|foo|bar")).toBeNull();
+    expect(extractRegexAlternationPrefilters("foo|bar|")).toBeNull();
+    expect(extractRegexAlternationPrefilters("foo\\")).toBeNull();
+  });
+
+  it("returns null when every alternation branch has no usable literal", () => {
+    expect(extractRegexAlternationPrefilters("a|b")).toBeNull(); // each branch < 2 chars
+    expect(extractRegexAlternationPrefilters(".|.|.")).toBeNull();
+  });
+
+  it("returns null when input has no alternation pipe", () => {
+    expect(extractRegexAlternationPrefilters("foobar")).toBeNull();
+  });
+
+  it("preserves escaped literals across branches", () => {
+    expect(extractRegexAlternationPrefilters("foo\\.bar|baz")).toEqual(["foo.bar", "baz"]);
+  });
+
+  it("dedupes duplicate literals in alternation", () => {
+    expect(extractRegexAlternationPrefilters("cat|dog|cat")).toEqual(["cat", "dog"]);
+  });
+
   it("builds grep search options with regex prefilter when safe", () => {
     const opts = buildGrepSearchOptions({
       pattern: "foo.*bar",
@@ -873,6 +905,85 @@ describe("regex literal prefilter", () => {
     expect(opts.prefilterPatterns).toEqual(["relationship", "partner", "married"]);
   });
 
+  it("multi-word non-regex pattern populates multiWordPatterns", () => {
+    const opts = buildGrepSearchOptions({
+      pattern: "pottery Melanie Caroline",
+      ignoreCase: false,
+      wordMatch: false,
+      filesOnly: false,
+      countOnly: false,
+      lineNumber: false,
+      invertMatch: false,
+      fixedString: false,
+    }, "/");
+
+    expect(opts.contentScanOnly).toBe(false);
+    expect(opts.multiWordPatterns).toEqual(["pottery", "Melanie", "Caroline"]);
+  });
+
+  it("single-word non-regex pattern leaves multiWordPatterns undefined", () => {
+    const opts = buildGrepSearchOptions({
+      pattern: "Caroline",
+      ignoreCase: false,
+      wordMatch: false,
+      filesOnly: false,
+      countOnly: false,
+      lineNumber: false,
+      invertMatch: false,
+      fixedString: false,
+    }, "/");
+
+    expect(opts.contentScanOnly).toBe(false);
+    expect(opts.multiWordPatterns).toBeUndefined();
+  });
+
+  it("very short tokens (<= 2 chars) are filtered out of multiWordPatterns", () => {
+    const opts = buildGrepSearchOptions({
+      pattern: "a by the pottery",
+      ignoreCase: false,
+      wordMatch: false,
+      filesOnly: false,
+      countOnly: false,
+      lineNumber: false,
+      invertMatch: false,
+      fixedString: false,
+    }, "/");
+
+    // "a", "by" filtered; "the", "pottery" kept
+    expect(opts.multiWordPatterns).toEqual(["the", "pottery"]);
+  });
+
+  it("regex pattern does not populate multiWordPatterns", () => {
+    const opts = buildGrepSearchOptions({
+      pattern: "foo|bar baz",
+      ignoreCase: false,
+      wordMatch: false,
+      filesOnly: false,
+      countOnly: false,
+      lineNumber: false,
+      invertMatch: false,
+      fixedString: false,
+    }, "/");
+
+    expect(opts.contentScanOnly).toBe(true);
+    expect(opts.multiWordPatterns).toBeUndefined();
+  });
+
+  it("more than 4 words: only first 4 survive", () => {
+    const opts = buildGrepSearchOptions({
+      pattern: "one two three four five six",
+      ignoreCase: false,
+      wordMatch: false,
+      filesOnly: false,
+      countOnly: false,
+      lineNumber: false,
+      invertMatch: false,
+      fixedString: false,
+    }, "/");
+
+    expect(opts.multiWordPatterns).toEqual(["one", "two", "three", "four"]);
+  });
+
   it("rejects alternation prefilters when grouping makes them unsafe", () => {
     expect(extractRegexAlternationPrefilters("(foo|bar)")).toBeNull();
     expect(extractRegexAlternationPrefilters("foo|bar.*baz")).toEqual(["foo", "bar"]);

diff --git a/claude-code/tests/pre-tool-use-branches.test.ts b/claude-code/tests/pre-tool-use-branches.test.ts
@@ -191,6 +191,67 @@ describe("processPreToolUse: non-memory / no-op paths", () => {
     expect(d?.command).toContain(`node "/SHELL" -c`);
     expect(d?.description).toContain("[DeepLake shell]");
   });
+
+  it("rewrites python3 on a tilde memory path to cat", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/data.json" }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toMatch(/^cat '\/[^']+'/);
+    expect(d?.command).toContain("/data.json");
+    expect(d?.command).not.toContain("RETRY REQUIRED");
+    expect(d?.description).toContain("converted unsupported interpreter read to cat");
+  });
+
+  it("rewrites python3 on an absolute memory path to cat", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: `python3 ${MEM_ABS}/session.json` }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toMatch(/^cat '\/[^']+'/);
+    expect(d?.command).toContain("/session.json");
+    expect(d?.command).not.toContain("RETRY REQUIRED");
+  });
+
+  it("rewrites node on memory path to cat", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: "node ~/.deeplake/memory/foo/bar.json" }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toMatch(/^cat '\/[^']+'/);
+  });
+
+  it("rewrites ruby on memory path to cat", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: "ruby ~/.deeplake/memory/a.rb" }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toMatch(/^cat '\/[^']+'/);
+  });
+
+  it("does not rewrite python3 on a memory directory (trailing slash)", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/" }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toContain("RETRY REQUIRED");
+  });
+
+  it("does not rewrite when shell metacharacters are present", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: "python3 ~/.deeplake/memory/a.json | head" }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toContain("RETRY REQUIRED");
+  });
+
+  it("does not rewrite when cmd starts with a non-interpreter", async () => {
+    const d = await processPreToolUse(
+      { session_id: "s", tool_name: "Bash", tool_input: { command: "curl ~/.deeplake/memory/a.json" }, tool_use_id: "t" },
+      { config: BASE_CONFIG as any, logFn: vi.fn() },
+    );
+    expect(d?.command).toContain("RETRY REQUIRED");
+  });
 });
 
 describe("processPreToolUse: Glob / ls branches", () => {