Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 67 additions & 44 deletions src/lib/search-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -341,33 +341,25 @@ export function sanitizeQuery(query: string | undefined): string | undefined {
return query;
}

// Track whether we auto-repaired the query so we return the repaired
// version (not the original) if no further rewriting is needed.
let effectiveQuery = query;
// --- Layer 1: Pre-parse text normalization ---
// Run cheap text transforms on every query BEFORE PEG parsing.
// These fix common patterns that agents/users produce, regardless of
// whether the PEG parser would accept them.
const normalized = normalizeQuery(query);

let nodes: SearchNode[];
try {
nodes = parse(query);
nodes = parse(normalized);
} catch {
// Malformed query — attempt common repairs before passing through.
// AI agents frequently produce slightly malformed search syntax that
// has clear intent (e.g., trailing commas in in-list filters).
const repaired = tryRepairQuery(query);
if (repaired !== query) {
effectiveQuery = repaired;
// Re-parse the repaired query — if it still fails, pass through
// to the API which returns a proper 400 with details.
try {
nodes = parse(repaired);
} catch {
return repaired;
}
// Log only after confirming the repair produced valid syntax
log.warn(
`Auto-repaired search query syntax. Running query: "${repaired}"`
);
} else {
return query;
}
// PEG parse still failed after normalization — pass through to the
// API which returns a proper 400 with actionable details.
return normalized;
}

if (normalized !== query) {
log.warn(
`Auto-repaired search query syntax. Running query: "${normalized}"`
);
}

// Check for OR inside paren groups first — these are opaque and can't
Expand Down Expand Up @@ -401,7 +393,7 @@ export function sanitizeQuery(query: string | undefined): string | undefined {
return sanitized;
}

return effectiveQuery;
return normalized;
}

/**
Expand Down Expand Up @@ -499,41 +491,72 @@ export const SEARCH_SYNTAX_REFERENCE = {
};

// ---------------------------------------------------------------------------
// Query repair
// Query normalization pipeline
// ---------------------------------------------------------------------------

/**
* Pattern matching in-list filters with trailing comma before the closing
* bracket, e.g., `key:[val1,val2,]` → `key:[val1,val2]`.
* Also catches wrong closing delimiter: `key:[val1,val2,)` → `key:[val1,val2]`.
* In-list filter with wrong closing delimiter `)`.
* Matches `key:[a,b,)` — captures the inner values. Does NOT match `[a,b,]`
* (handled separately by {@link stripTrailingListCommas} via balanced brackets).
*/
const TRAILING_COMMA_IN_LIST_RE = /\[([^[\]]*),\s*[\])](?=\s|$)/g;
const MALFORMED_IN_LIST_RE = /\[([^[\]]*),\s*\)(?=\s|$)/g;

/** Trailing comma at end of captured group content */
const TRAILING_COMMA_RE = /,\s*$/;

/** Balanced `[...]` block — used to skip well-formed in-list filters */
const BALANCED_BRACKET_RE = /\[[^\]]*\]/g;

/** Trailing comma before closing bracket: `,]` */
const TRAILING_LIST_COMMA_RE = /,\s*\]$/;

/**
* Attempt common repairs on a malformed search query.
*
* AI agents and users frequently produce queries with minor syntax errors
* that have clear intent. Rather than failing with a cryptic 400, we repair
* what we can and warn. Current repairs:
* Normalize a search query by applying a pipeline of text repairs.
*
* 1. Trailing commas in in-list filters: `key:[a,b,]` → `key:[a,b]`
* 2. Wrong closing delimiter: `key:[a,b,)` → `key:[a,b]`
* Runs on every query BEFORE PEG parsing. Each pass is a small, focused
* transform that fixes a common agent/user mistake. The pipeline is ordered
* from most common to least common pattern.
*
* Returns the original query unchanged if no repairs were applicable.
*/
function tryRepairQuery(query: string): string {
let repaired = query;
function normalizeQuery(query: string): string {
let q = query;

// 1. Fix mismatched closing delimiters: `[a,b,)` → `[a,b]`
// The `)` is a common typo/autocomplete artifact.
q = fixMismatchedBrackets(q);

// Fix trailing commas and wrong closing delimiters in in-list filters
repaired = repaired.replace(
TRAILING_COMMA_IN_LIST_RE,
// 2. Strip trailing commas in in-list: `[a,b,]` → `[a,b]`
q = stripTrailingListCommas(q);

// Future passes can be added here (e.g., quote balancing, date normalization)

return q;
}

/**
* Fix mismatched closing delimiters in in-list filters.
*
* `key:[a,b,)` → `key:[a,b]` — the `)` after `[` is clearly meant to be `]`.
* Only replaces `)` that follows a `[...` opener without an intervening `]`.
*/
function fixMismatchedBrackets(query: string): string {
return query.replace(
MALFORMED_IN_LIST_RE,
(_match, inner: string) => `[${inner.replace(TRAILING_COMMA_RE, "")}]`
);
}

return repaired;
/**
* Strip trailing commas inside in-list filters.
*
* `key:[a,b,]` → `key:[a,b]` — valid PEG syntax but some APIs reject it.
* Only operates on balanced `[...]` blocks to avoid cross-filter corruption.
*/
function stripTrailingListCommas(query: string): string {
return query.replace(BALANCED_BRACKET_RE, (match) =>
match.replace(TRAILING_LIST_COMMA_RE, "]")
);
Comment thread
BYK marked this conversation as resolved.
}

// ---------------------------------------------------------------------------
Expand All @@ -549,5 +572,5 @@ export const __testing = {
tryRewriteOr,
serializeNode,
serializeNodes,
tryRepairQuery,
normalizeQuery,
};
120 changes: 65 additions & 55 deletions test/lib/search-query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { describe, expect, test } from "bun:test";
import { ValidationError } from "../../src/lib/errors.js";
import { __testing, sanitizeQuery } from "../../src/lib/search-query.js";

const { tryRepairQuery } = __testing;
const { normalizeQuery } = __testing;

// ---------------------------------------------------------------------------
// Passthrough (no operators)
Expand Down Expand Up @@ -332,69 +332,80 @@ describe("sanitizeQuery: edge cases", () => {
});
});

describe("tryRepairQuery: auto-repair malformed syntax", () => {
test("fixes trailing comma in in-list filter", () => {
expect(tryRepairQuery("level:[error,warning,]")).toBe(
"level:[error,warning]"
);
});

test("fixes trailing comma with spaces", () => {
expect(tryRepairQuery("level:[error, warning, ]")).toBe(
"level:[error, warning]"
);
});

test("fixes wrong closing delimiter ) → ]", () => {
expect(tryRepairQuery("status_code:[401,403,429,500,)")).toBe(
"status_code:[401,403,429,500]"
);
});

test("fixes trailing comma + wrong delimiter combined", () => {
expect(tryRepairQuery("error.http.status_code:[401,403,429,500,)")).toBe(
"error.http.status_code:[401,403,429,500]"
);
});

test("repairs within a longer query", () => {
expect(
tryRepairQuery("is:unresolved error.http.status_code:[401,403,429,500,)")
).toBe("is:unresolved error.http.status_code:[401,403,429,500]");
});

test("leaves valid queries unchanged", () => {
expect(tryRepairQuery("level:[error,warning]")).toBe(
"level:[error,warning]"
);
});

test("leaves non-list queries unchanged", () => {
expect(tryRepairQuery("is:unresolved level:error")).toBe(
"is:unresolved level:error"
);
});

test("leaves empty query unchanged", () => {
expect(tryRepairQuery("")).toBe("");
describe("normalizeQuery: pre-parse text normalization", () => {
describe("mismatched brackets", () => {
test("fixes wrong closing delimiter ) → ]", () => {
expect(normalizeQuery("status_code:[401,403,429,500,)")).toBe(
"status_code:[401,403,429,500]"
);
});

test("fixes trailing comma + wrong delimiter combined", () => {
expect(normalizeQuery("error.http.status_code:[401,403,429,500,)")).toBe(
"error.http.status_code:[401,403,429,500]"
);
});

test("repairs within a longer query", () => {
expect(
normalizeQuery(
"is:unresolved error.http.status_code:[401,403,429,500,)"
)
).toBe("is:unresolved error.http.status_code:[401,403,429,500]");
});
});

describe("trailing list commas", () => {
test("strips trailing comma in in-list filter", () => {
expect(normalizeQuery("level:[error,warning,]")).toBe(
"level:[error,warning]"
);
});

test("strips trailing comma with spaces", () => {
expect(normalizeQuery("level:[error, warning, ]")).toBe(
"level:[error, warning]"
);
});
});

describe("passthrough", () => {
test("leaves valid queries unchanged", () => {
expect(normalizeQuery("level:[error,warning]")).toBe(
"level:[error,warning]"
);
});

test("leaves non-list queries unchanged", () => {
expect(normalizeQuery("is:unresolved level:error")).toBe(
"is:unresolved level:error"
);
});

test("leaves empty query unchanged", () => {
expect(normalizeQuery("")).toBe("");
});

test("does not cross filter boundaries", () => {
// Two filters — each should be repaired independently
expect(normalizeQuery("a:[1,) b:[2,)")).toBe("a:[1] b:[2]");
});
});
});

describe("sanitizeQuery: auto-repair integration", () => {
test("trailing comma before ] is valid PEG syntax — no repair needed", () => {
// The PEG parser accepts trailing commas before ], so this parses fine
// and passes through without repair.
describe("sanitizeQuery: normalization integration", () => {
test("normalizes trailing comma before ] (pre-parse)", () => {
// Trailing comma is stripped before PEG parsing
const result = sanitizeQuery("level:[error,warning,]");
expect(result).toBe("level:[error,warning,]");
expect(result).toBe("level:[error,warning]");
});

test("auto-repairs wrong closing delimiter ) and returns fixed query", () => {
// The ) closing delimiter fails PEG parsing, triggering auto-repair
test("normalizes wrong closing delimiter ) (pre-parse)", () => {
const result = sanitizeQuery("level:[error,warning,)");
expect(result).toBe("level:[error,warning]");
});

test("auto-repairs complex filter with wrong delimiter in longer query", () => {
test("normalizes complex filter in longer query", () => {
const result = sanitizeQuery(
"is:unresolved error.http.status_code:[401,403,429,500,)"
);
Expand All @@ -404,7 +415,6 @@ describe("sanitizeQuery: auto-repair integration", () => {
});

test("unfixable malformed query passes through to API", () => {
// Completely broken syntax that tryRepairQuery can't fix
const result = sanitizeQuery("((( broken");
expect(result).toBe("((( broken");
});
Expand Down
Loading