From 5a17e77d9fd5b9fac67f66c951112cc3f82a296b Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Wed, 29 Apr 2026 04:17:28 +0000 Subject: [PATCH 1/3] refactor(search-query): replace tryRepairQuery with generic normalizeQuery pipeline (CLI-FA) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the specific tryRepairQuery (catch-block-only, single regex) with a normalizeQuery pipeline that runs BEFORE PEG parsing on every query: 1. fixMismatchedBrackets: [a,b,) → [a,b] (wrong closing delimiter) 2. stripTrailingListCommas: [a,b,] → [a,b] (trailing comma in balanced brackets) 3. Whitespace collapse: double spaces → single, trim edges The pipeline architecture makes it easy to add more normalization passes (e.g., date format repair, quote balancing) without growing complexity. Pre-parse normalization is cheaper and more predictable than the previous approach of only repairing in the PEG failure catch block. --- src/lib/search-query.ts | 113 +++++++++++++++++----------- test/lib/search-query.test.ts | 137 ++++++++++++++++++++-------------- 2 files changed, 152 insertions(+), 98 deletions(-) diff --git a/src/lib/search-query.ts b/src/lib/search-query.ts index 2813ee1bd..09340a3b3 100644 --- a/src/lib/search-query.ts +++ b/src/lib/search-query.ts @@ -341,33 +341,26 @@ export function sanitizeQuery(query: string | undefined): string | undefined { return query; } - // Track whether we auto-repaired the query so we return the repaired - // version (not the original) if no further rewriting is needed. - let effectiveQuery = query; + // --- Layer 1: Pre-parse text normalization --- + // Run cheap text transforms on every query BEFORE PEG parsing. + // These fix common patterns that agents/users produce, regardless of + // whether the PEG parser would accept them. + const normalized = normalizeQuery(query); + const effectiveQuery = normalized; + let nodes: SearchNode[]; try { - nodes = parse(query); + nodes = parse(normalized); } catch { - // Malformed query — attempt common repairs before passing through. - // AI agents frequently produce slightly malformed search syntax that - // has clear intent (e.g., trailing commas in in-list filters). - const repaired = tryRepairQuery(query); - if (repaired !== query) { - effectiveQuery = repaired; - // Re-parse the repaired query — if it still fails, pass through - // to the API which returns a proper 400 with details. - try { - nodes = parse(repaired); - } catch { - return repaired; - } - // Log only after confirming the repair produced valid syntax - log.warn( - `Auto-repaired search query syntax. Running query: "${repaired}"` - ); - } else { - return query; - } + // PEG parse still failed after normalization — pass through to the + // API which returns a proper 400 with actionable details. + return normalized; + } + + if (normalized !== query) { + log.warn( + `Auto-repaired search query syntax. Running query: "${normalized}"` + ); } // Check for OR inside paren groups first — these are opaque and can't @@ -499,41 +492,75 @@ export const SEARCH_SYNTAX_REFERENCE = { }; // --------------------------------------------------------------------------- -// Query repair +// Query normalization pipeline // --------------------------------------------------------------------------- /** - * Pattern matching in-list filters with trailing comma before the closing - * bracket, e.g., `key:[val1,val2,]` → `key:[val1,val2]`. - * Also catches wrong closing delimiter: `key:[val1,val2,)` → `key:[val1,val2]`. + * In-list filter with wrong closing delimiter or trailing comma. + * Matches `key:[a,b,)` and `key:[a,b,]` — captures the inner values. */ -const TRAILING_COMMA_IN_LIST_RE = /\[([^[\]]*),\s*[\])](?=\s|$)/g; +const MALFORMED_IN_LIST_RE = /\[([^[\]]*),\s*[\])](?=\s|$)/g; /** Trailing comma at end of captured group content */ const TRAILING_COMMA_RE = /,\s*$/; +/** Runs of multiple spaces — collapsed to single space */ +const MULTI_SPACE_RE = / {2,}/g; + +/** Balanced `[...]` block — used to skip well-formed in-list filters */ +const BALANCED_BRACKET_RE = /\[[^\]]*\]/g; + +/** Trailing comma before closing bracket: `,]` */ +const TRAILING_LIST_COMMA_RE = /,\s*\]$/; + /** - * Attempt common repairs on a malformed search query. - * - * AI agents and users frequently produce queries with minor syntax errors - * that have clear intent. Rather than failing with a cryptic 400, we repair - * what we can and warn. Current repairs: + * Normalize a search query by applying a pipeline of text repairs. * - * 1. Trailing commas in in-list filters: `key:[a,b,]` → `key:[a,b]` - * 2. Wrong closing delimiter: `key:[a,b,)` → `key:[a,b]` + * Runs on every query BEFORE PEG parsing. Each pass is a small, focused + * transform that fixes a common agent/user mistake. The pipeline is ordered + * from most common to least common pattern. * * Returns the original query unchanged if no repairs were applicable. */ -function tryRepairQuery(query: string): string { - let repaired = query; +function normalizeQuery(query: string): string { + let q = query; + + // 1. Fix mismatched closing delimiters: `[a,b,)` → `[a,b]` + // The `)` is a common typo/autocomplete artifact. + q = fixMismatchedBrackets(q); - // Fix trailing commas and wrong closing delimiters in in-list filters - repaired = repaired.replace( - TRAILING_COMMA_IN_LIST_RE, + // 2. Strip trailing commas in in-list: `[a,b,]` → `[a,b]` + q = stripTrailingListCommas(q); + + // 3. Collapse runs of whitespace (agents sometimes double-space) + q = q.replace(MULTI_SPACE_RE, " ").trim(); + + return q; +} + +/** + * Fix mismatched closing delimiters in in-list filters. + * + * `key:[a,b,)` → `key:[a,b]` — the `)` after `[` is clearly meant to be `]`. + * Only replaces `)` that follows a `[...` opener without an intervening `]`. + */ +function fixMismatchedBrackets(query: string): string { + return query.replace( + MALFORMED_IN_LIST_RE, (_match, inner: string) => `[${inner.replace(TRAILING_COMMA_RE, "")}]` ); +} - return repaired; +/** + * Strip trailing commas inside in-list filters. + * + * `key:[a,b,]` → `key:[a,b]` — valid PEG syntax but some APIs reject it. + * Only operates on balanced `[...]` blocks to avoid cross-filter corruption. + */ +function stripTrailingListCommas(query: string): string { + return query.replace(BALANCED_BRACKET_RE, (match) => + match.replace(TRAILING_LIST_COMMA_RE, "]") + ); } // --------------------------------------------------------------------------- @@ -549,5 +576,5 @@ export const __testing = { tryRewriteOr, serializeNode, serializeNodes, - tryRepairQuery, + normalizeQuery, }; diff --git a/test/lib/search-query.test.ts b/test/lib/search-query.test.ts index a8fafab69..1ff3d8cc6 100644 --- a/test/lib/search-query.test.ts +++ b/test/lib/search-query.test.ts @@ -14,7 +14,7 @@ import { describe, expect, test } from "bun:test"; import { ValidationError } from "../../src/lib/errors.js"; import { __testing, sanitizeQuery } from "../../src/lib/search-query.js"; -const { tryRepairQuery } = __testing; +const { normalizeQuery } = __testing; // --------------------------------------------------------------------------- // Passthrough (no operators) @@ -332,69 +332,92 @@ describe("sanitizeQuery: edge cases", () => { }); }); -describe("tryRepairQuery: auto-repair malformed syntax", () => { - test("fixes trailing comma in in-list filter", () => { - expect(tryRepairQuery("level:[error,warning,]")).toBe( - "level:[error,warning]" - ); - }); - - test("fixes trailing comma with spaces", () => { - expect(tryRepairQuery("level:[error, warning, ]")).toBe( - "level:[error, warning]" - ); - }); - - test("fixes wrong closing delimiter ) → ]", () => { - expect(tryRepairQuery("status_code:[401,403,429,500,)")).toBe( - "status_code:[401,403,429,500]" - ); - }); - - test("fixes trailing comma + wrong delimiter combined", () => { - expect(tryRepairQuery("error.http.status_code:[401,403,429,500,)")).toBe( - "error.http.status_code:[401,403,429,500]" - ); - }); - - test("repairs within a longer query", () => { - expect( - tryRepairQuery("is:unresolved error.http.status_code:[401,403,429,500,)") - ).toBe("is:unresolved error.http.status_code:[401,403,429,500]"); - }); - - test("leaves valid queries unchanged", () => { - expect(tryRepairQuery("level:[error,warning]")).toBe( - "level:[error,warning]" - ); - }); - - test("leaves non-list queries unchanged", () => { - expect(tryRepairQuery("is:unresolved level:error")).toBe( - "is:unresolved level:error" - ); - }); - - test("leaves empty query unchanged", () => { - expect(tryRepairQuery("")).toBe(""); +describe("normalizeQuery: pre-parse text normalization", () => { + describe("mismatched brackets", () => { + test("fixes wrong closing delimiter ) → ]", () => { + expect(normalizeQuery("status_code:[401,403,429,500,)")).toBe( + "status_code:[401,403,429,500]" + ); + }); + + test("fixes trailing comma + wrong delimiter combined", () => { + expect(normalizeQuery("error.http.status_code:[401,403,429,500,)")).toBe( + "error.http.status_code:[401,403,429,500]" + ); + }); + + test("repairs within a longer query", () => { + expect( + normalizeQuery( + "is:unresolved error.http.status_code:[401,403,429,500,)" + ) + ).toBe("is:unresolved error.http.status_code:[401,403,429,500]"); + }); + }); + + describe("trailing list commas", () => { + test("strips trailing comma in in-list filter", () => { + expect(normalizeQuery("level:[error,warning,]")).toBe( + "level:[error,warning]" + ); + }); + + test("strips trailing comma with spaces", () => { + expect(normalizeQuery("level:[error, warning, ]")).toBe( + "level:[error, warning]" + ); + }); + }); + + describe("whitespace normalization", () => { + test("collapses runs of spaces", () => { + expect(normalizeQuery("is:unresolved level:error")).toBe( + "is:unresolved level:error" + ); + }); + + test("trims leading and trailing whitespace", () => { + expect(normalizeQuery(" is:unresolved ")).toBe("is:unresolved"); + }); + }); + + describe("passthrough", () => { + test("leaves valid queries unchanged", () => { + expect(normalizeQuery("level:[error,warning]")).toBe( + "level:[error,warning]" + ); + }); + + test("leaves non-list queries unchanged", () => { + expect(normalizeQuery("is:unresolved level:error")).toBe( + "is:unresolved level:error" + ); + }); + + test("leaves empty query unchanged", () => { + expect(normalizeQuery("")).toBe(""); + }); + + test("does not cross filter boundaries", () => { + // Two filters — each should be repaired independently + expect(normalizeQuery("a:[1,) b:[2,)")).toBe("a:[1] b:[2]"); + }); }); }); -describe("sanitizeQuery: auto-repair integration", () => { - test("trailing comma before ] is valid PEG syntax — no repair needed", () => { - // The PEG parser accepts trailing commas before ], so this parses fine - // and passes through without repair. +describe("sanitizeQuery: normalization integration", () => { + test("normalizes trailing comma before ] (pre-parse)", () => { + // Trailing comma is stripped before PEG parsing const result = sanitizeQuery("level:[error,warning,]"); - expect(result).toBe("level:[error,warning,]"); + expect(result).toBe("level:[error,warning]"); }); - test("auto-repairs wrong closing delimiter ) and returns fixed query", () => { - // The ) closing delimiter fails PEG parsing, triggering auto-repair + test("normalizes wrong closing delimiter ) (pre-parse)", () => { const result = sanitizeQuery("level:[error,warning,)"); expect(result).toBe("level:[error,warning]"); }); - test("auto-repairs complex filter with wrong delimiter in longer query", () => { + test("normalizes complex filter in longer query", () => { const result = sanitizeQuery( "is:unresolved error.http.status_code:[401,403,429,500,)" ); @@ -403,8 +426,12 @@ describe("sanitizeQuery: auto-repair integration", () => { ); }); + test("collapses double-spaces in query", () => { + const result = sanitizeQuery("is:unresolved level:error"); + expect(result).toBe("is:unresolved level:error"); + }); + test("unfixable malformed query passes through to API", () => { - // Completely broken syntax that tryRepairQuery can't fix const result = sanitizeQuery("((( broken"); expect(result).toBe("((( broken"); }); From d082c504f758986f38c5b1029b846c65bd3e9e95 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Wed, 29 Apr 2026 04:29:11 +0000 Subject: [PATCH 2/3] fix: tighten bracket regex to only match ), remove unsafe whitespace collapse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. MALFORMED_IN_LIST_RE now only matches ) as wrong closing delimiter, not ] (which is handled by stripTrailingListCommas). Removes the overlap between the two pipeline steps. 2. Removes whitespace collapse — it would alter content inside quoted values like message:"connection timed out". Addresses Cursor Bugbot medium + low severity findings. --- src/lib/search-query.ts | 13 +++++-------- test/lib/search-query.test.ts | 17 ----------------- 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/src/lib/search-query.ts b/src/lib/search-query.ts index 09340a3b3..e3eee0cba 100644 --- a/src/lib/search-query.ts +++ b/src/lib/search-query.ts @@ -496,17 +496,15 @@ export const SEARCH_SYNTAX_REFERENCE = { // --------------------------------------------------------------------------- /** - * In-list filter with wrong closing delimiter or trailing comma. - * Matches `key:[a,b,)` and `key:[a,b,]` — captures the inner values. + * In-list filter with wrong closing delimiter `)`. + * Matches `key:[a,b,)` — captures the inner values. Does NOT match `[a,b,]` + * (handled separately by {@link stripTrailingListCommas} via balanced brackets). */ -const MALFORMED_IN_LIST_RE = /\[([^[\]]*),\s*[\])](?=\s|$)/g; +const MALFORMED_IN_LIST_RE = /\[([^[\]]*),\s*\)(?=\s|$)/g; /** Trailing comma at end of captured group content */ const TRAILING_COMMA_RE = /,\s*$/; -/** Runs of multiple spaces — collapsed to single space */ -const MULTI_SPACE_RE = / {2,}/g; - /** Balanced `[...]` block — used to skip well-formed in-list filters */ const BALANCED_BRACKET_RE = /\[[^\]]*\]/g; @@ -532,8 +530,7 @@ function normalizeQuery(query: string): string { // 2. Strip trailing commas in in-list: `[a,b,]` → `[a,b]` q = stripTrailingListCommas(q); - // 3. Collapse runs of whitespace (agents sometimes double-space) - q = q.replace(MULTI_SPACE_RE, " ").trim(); + // Future passes can be added here (e.g., quote balancing, date normalization) return q; } diff --git a/test/lib/search-query.test.ts b/test/lib/search-query.test.ts index 1ff3d8cc6..00979818d 100644 --- a/test/lib/search-query.test.ts +++ b/test/lib/search-query.test.ts @@ -369,18 +369,6 @@ describe("normalizeQuery: pre-parse text normalization", () => { }); }); - describe("whitespace normalization", () => { - test("collapses runs of spaces", () => { - expect(normalizeQuery("is:unresolved level:error")).toBe( - "is:unresolved level:error" - ); - }); - - test("trims leading and trailing whitespace", () => { - expect(normalizeQuery(" is:unresolved ")).toBe("is:unresolved"); - }); - }); - describe("passthrough", () => { test("leaves valid queries unchanged", () => { expect(normalizeQuery("level:[error,warning]")).toBe( @@ -426,11 +414,6 @@ describe("sanitizeQuery: normalization integration", () => { ); }); - test("collapses double-spaces in query", () => { - const result = sanitizeQuery("is:unresolved level:error"); - expect(result).toBe("is:unresolved level:error"); - }); - test("unfixable malformed query passes through to API", () => { const result = sanitizeQuery("((( broken"); expect(result).toBe("((( broken"); From b55cfb0d011d45e8b96ba0a545cac1f6b15ee708 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Wed, 29 Apr 2026 04:40:11 +0000 Subject: [PATCH 3/3] fix: remove redundant effectiveQuery alias --- src/lib/search-query.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/search-query.ts b/src/lib/search-query.ts index e3eee0cba..d39a7d636 100644 --- a/src/lib/search-query.ts +++ b/src/lib/search-query.ts @@ -346,7 +346,6 @@ export function sanitizeQuery(query: string | undefined): string | undefined { // These fix common patterns that agents/users produce, regardless of // whether the PEG parser would accept them. const normalized = normalizeQuery(query); - const effectiveQuery = normalized; let nodes: SearchNode[]; try { @@ -394,7 +393,7 @@ export function sanitizeQuery(query: string | undefined): string | undefined { return sanitized; } - return effectiveQuery; + return normalized; } /**