From 0bf4a1e23785730b9bb363e9744dfc280cf14f76 Mon Sep 17 00:00:00 2001 From: tarunm Date: Tue, 5 May 2026 10:28:56 +0000 Subject: [PATCH 1/2] fix(sql): strip trailing -- comments before OPTIONS regex match The legacy OPTIONS regex (e.g. `option(skipUpsert=true)`) is applied to the raw SQL string before it is passed to the Calcite parser. Because `sanitizeSql` only stripped trailing whitespace, a query ending with a single-line comment such as SELECT col1 FROM foo -- option(skipUpsert=true) would be mistakenly treated as if `skipUpsert=true` were a real query option, since the regex anchors at end-of-string (\Z). Fix: scan only the last line of the sanitized SQL for an unquoted `--` sequence and remove it (plus any resulting trailing whitespace) before the OPTIONS regex is applied. Block comments (`/* ... */`) are not affected because they shift the `option(...)` text away from the end-of-string anchor and therefore never triggered the bug. Added two regression test cases to `CalciteSqlCompilerTest#testQueryOptions`: - `-- option(skipUpsert=true)` trailing comment must not set query options - `/* option(skipUpsert=true) */` block comment must not set query options --- .../apache/pinot/sql/parsers/ParserUtils.java | 83 +++++++++++++++++-- .../sql/parsers/CalciteSqlCompilerTest.java | 21 +++++ 2 files changed, 99 insertions(+), 5 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java index 4390939d2148..23b8a70237d0 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java @@ -47,17 +47,90 @@ public static void validateFunction(String canonicalName, List opera */ public static String sanitizeSql(String sql) { - // 1. Remove trailing whitespaces + // 1. Strip single-line SQL comments (-- ... to end of line). + // The legacy OPTIONS regex anchors at end-of-string, so without this step a query + // like "SELECT col1 FROM foo -- option(skipUpsert=true)" would be mistakenly matched + // as if skipUpsert were a real query option. + sql = stripSingleLineComments(sql); + // 2. Remove trailing whitespace int endIndex = sql.length() - 1; while (endIndex >= 0 && Character.isWhitespace(sql.charAt(endIndex))) { endIndex--; } - sql = sql.substring(0, endIndex + 1); - - // Likewise extend for other improvements + return sql.substring(0, endIndex + 1); + } - return sql; + /** + * Returns the sql string with all single-line SQL comments (-- ... to end of line) removed, + * respecting single-quoted string literals, double-quoted identifiers, and block comments. + * A "--" found inside a block comment or a quoted context is not treated as a comment marker. + */ + static String stripSingleLineComments(String sql) { + StringBuilder result = new StringBuilder(sql.length()); + int len = sql.length(); + boolean inSingleQuote = false; + boolean inDoubleQuote = false; + boolean inBlockComment = false; + int i = 0; + while (i < len) { + char c = sql.charAt(i); + if (inBlockComment) { + result.append(c); + if (c == '*' && i + 1 < len && sql.charAt(i + 1) == '/') { + result.append('/'); + inBlockComment = false; + i += 2; + } else { + i++; + } + } else if (inSingleQuote) { + result.append(c); + if (c == '\'' && i + 1 < len && sql.charAt(i + 1) == '\'') { + result.append('\''); + i += 2; // '' escape inside a single-quoted literal + } else { + if (c == '\'') { + inSingleQuote = false; + } + i++; + } + } else if (inDoubleQuote) { + result.append(c); + if (c == '"' && i + 1 < len && sql.charAt(i + 1) == '"') { + result.append('"'); + i += 2; // "" escape inside a double-quoted identifier + } else { + if (c == '"') { + inDoubleQuote = false; + } + i++; + } + } else { + if (c == '\'') { + inSingleQuote = true; + result.append(c); + i++; + } else if (c == '"') { + inDoubleQuote = true; + result.append(c); + i++; + } else if (c == '/' && i + 1 < len && sql.charAt(i + 1) == '*') { + inBlockComment = true; + result.append(c); + i++; + } else if (c == '-' && i + 1 < len && sql.charAt(i + 1) == '-') { + // Skip from here to end of line; the newline itself is kept. + while (i < len && sql.charAt(i) != '\n') { + i++; + } + } else { + result.append(c); + i++; + } + } + } + return result.toString(); } private static void validateJsonExtractScalarFunction(List operands) { diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index 532b28ab0bd5..e711a8047c77 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -879,6 +879,27 @@ public void testQueryOptions() { } catch (SqlCompilationException e) { Assert.assertTrue(e.getCause() instanceof ParseException); } + + // Options inside SQL comments must NOT be honored. + // A trailing -- comment should not be mistaken for a real query option. + pinotQuery = compileToPinotQuery( + "SELECT col1, count(*) FROM foo GROUP BY col1 -- option(skipUpsert=true)"); + Assert.assertTrue(pinotQuery.getQueryOptions() == null || pinotQuery.getQueryOptions().isEmpty(), + "option(...) inside a -- comment must not be parsed as a query option"); + + // Same check when the -- comment appears inline after a WHERE predicate (multi-line query). + // The regex finds OPTION() starting from the 'O', ignoring the preceding '--', so without + // the fix this would incorrectly honour skipUpsert. + pinotQuery = compileToPinotQuery( + "select *\nfrom foo\nwhere uuid = 1 --OPTION(skipUpsert = true)"); + Assert.assertTrue(pinotQuery.getQueryOptions() == null || pinotQuery.getQueryOptions().isEmpty(), + "OPTION(...) after -- in a WHERE clause must not be parsed as a query option"); + + // A /* */ block comment should also not trigger the OPTIONS parser. + pinotQuery = compileToPinotQuery( + "SELECT col1, count(*) FROM foo GROUP BY col1 /* option(skipUpsert=true) */"); + Assert.assertTrue(pinotQuery.getQueryOptions() == null || pinotQuery.getQueryOptions().isEmpty(), + "option(...) inside a /* */ comment must not be parsed as a query option"); } @Test From c60421b3ba36be5d314136a74b8c6078de332b70 Mon Sep 17 00:00:00 2001 From: tarunm Date: Fri, 15 May 2026 16:21:34 +0000 Subject: [PATCH 2/2] test: add double-dashed quoted identifier test for comment stripping Verifies that "--" inside a double-quoted column name ("my--column--name") is not mistakenly treated as a single-line comment by stripSingleLineComments. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../apache/pinot/sql/parsers/CalciteSqlCompilerTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index e711a8047c77..9db255302649 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -900,6 +900,12 @@ public void testQueryOptions() { "SELECT col1, count(*) FROM foo GROUP BY col1 /* option(skipUpsert=true) */"); Assert.assertTrue(pinotQuery.getQueryOptions() == null || pinotQuery.getQueryOptions().isEmpty(), "option(...) inside a /* */ comment must not be parsed as a query option"); + + // Double dashes inside a double-quoted identifier must not be treated as a comment. + pinotQuery = compileToPinotQuery( + "SELECT \"my--column--name\" FROM foo"); + Assert.assertEquals( + pinotQuery.getSelectList().get(0).getIdentifier().getName(), "my--column--name"); } @Test