From 395688748de40e5fe69b3b595b01c762f26c2f8a Mon Sep 17 00:00:00 2001 From: airborne12 Date: Mon, 25 May 2026 23:46:46 +0800 Subject: [PATCH 1/3] [fix](search) reject Lucene-syntax search on columns without inverted index Issue Number: close #N/A (Jira CIR-20006) Problem Summary: SEARCH (Lucene syntax) predicates against columns that have no inverted index silently fall back to an empty bitmap on BE (vsearch.cpp and function_search.cpp only log a WARNING then return Status::OK() with an empty result), making the query look like "no rows matched". That is indistinguishable from a successful query that simply found nothing and misleads users. Validate at planning time in RewriteSearchToSlots, matching the existing "column does not exist" behavior: - Normal columns: require OlapTable.getInvertedIndex(column, null) != null. - Variant subcolumns (parent.path): require any INVERTED index whose first column equals the parent variant column; the concrete subcolumn binding is still resolved per-segment in BE, consistent with the is_variant_sub branch in function_search.cpp. Also harden OlapTable.getInvertedIndex against NPE when the table has no TableIndexes set (returns null instead of dereferencing). SEARCH() with Lucene syntax now throws AnalysisException at planning time when the referenced column has no inverted index, with guidance to add one via ALTER TABLE ... ADD INDEX ... USING INVERTED. Previously such queries silently returned zero rows. - Test: - Unit Test: RewriteSearchToSlotsTest updated and extended (testRewriteSearchThrowsWhenColumnHasNoInvertedIndex, testRewriteSearchSucceedsWhenColumnHasInvertedIndex, testRewriteSearchHandlesCaseInsensitiveField switched to a table with an inverted index on name). - Behavior changed: Yes - previously silent FALSE now becomes a clear AnalysisException at planning time. - Does this need documentation: No (cherry picked from commit a4a9cf81e5820030d6668cafd0faea21f4a5fbc4) --- .../org/apache/doris/catalog/OlapTable.java | 3 + .../rules/rewrite/RewriteSearchToSlots.java | 57 +++++++++++++++ .../rewrite/RewriteSearchToSlotsTest.java | 69 ++++++++++++++++++- 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 020173cd44d85c..41082811191422 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3801,6 +3801,9 @@ public Index getInvertedIndex(Column column, List subPath) { } public Index getInvertedIndex(Column column, List subPath, String analyzer) { + if (indexes == null) { + return null; + } List invertedIndexes = new ArrayList<>(); for (Index index : indexes.getIndexes()) { if (index.getIndexType() == IndexDef.IndexType.INVERTED) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java index 83da8f99a96821..9ab8d7870dc28a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java @@ -17,6 +17,10 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Index; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.info.IndexType; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; @@ -128,6 +132,11 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { parentFieldName, search.getDslString())); } + // Check the parent variant column has at least one INVERTED index. The concrete + // subcolumn binding is resolved per-segment in BE, so we only enforce the parent + // level here. See function_search.cpp is_variant_sub branch. + checkInvertedIndexExists(scan.getTable(), parentFieldName, search.getDslString(), true); + // Create ElementAt expression for variant subcolumn // This will be converted to an extracted column slot by VariantSubPathPruning rule // If the subcolumn doesn't exist, ElementAt will remain and BE will handle it gracefully @@ -146,6 +155,7 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { "Field '%s' not found in table for search: %s", originalFieldName, search.getDslString())); } + checkInvertedIndexExists(scan.getTable(), slot.getName(), search.getDslString(), false); childExpr = slot; normalizedFieldName = slot.getName(); } @@ -168,6 +178,53 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { } } + /** + * Ensure the column referenced by a Lucene-syntax SEARCH predicate has an inverted index. + * Without this check the BE path would silently fall back to an empty bitmap (i.e. all FALSE), + * which is indistinguishable from "no rows matched" to the user. Throw at planning time so the + * behavior is consistent with referencing a non-existent column. + * + * @param table table backing the LogicalOlapScan + * @param columnName column name (parent column name when isVariantParent) + * @param dsl original DSL, used in the error message + * @param isVariantParent true when {@code columnName} is the parent of a variant subcolumn + * access (e.g. {@code msg.body}); for that case any INVERTED index on + * the parent column is accepted because the concrete subcolumn binding + * is resolved per-segment in BE. + */ + private void checkInvertedIndexExists(OlapTable table, String columnName, String dsl, + boolean isVariantParent) { + Column column = table.getColumn(columnName); + if (column == null) { + // Field existence is already validated by findSlotByName; if we reach here the schema + // changed concurrently. Surface a clear error rather than fall through. + throw new AnalysisException(String.format( + "Column '%s' not found in table '%s' for search: %s", + columnName, table.getName(), dsl)); + } + + if (isVariantParent) { + for (Index index : table.getIndexes()) { + if (index.getIndexType() != IndexType.INVERTED) { + continue; + } + List columns = index.getColumns(); + if (columns != null && !columns.isEmpty() + && columnName.equalsIgnoreCase(columns.get(0))) { + return; + } + } + } else if (table.getInvertedIndex(column, null) != null) { + return; + } + + throw new AnalysisException(String.format( + "Field '%s' has no inverted index, cannot be used in search: %s. " + + "Create an inverted index on the column first " + + "(ALTER TABLE ... ADD INDEX ... USING INVERTED).", + columnName, dsl)); + } + private Slot findSlotByName(String fieldName, LogicalOlapScan scan) { // Direct match only - variant subcolumns are handled by caller for (Slot slot : scan.getOutput()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java index 76e25cc3879e65..877951c32bb085 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java @@ -17,6 +17,15 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Index; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.TableIndexes; +import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.info.IndexType; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.trees.expressions.Expression; @@ -28,6 +37,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.util.PlanConstructor; +import org.apache.doris.thrift.TStorageType; import com.google.common.collect.ImmutableList; import org.junit.jupiter.api.Assertions; @@ -229,7 +239,7 @@ public void testSlotReferenceConsistency() { @Test public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception { LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), - PlanConstructor.student, ImmutableList.of("db")); + buildStudentWithInvertedIndexOnName(100L), ImmutableList.of("db")); Search searchFunc = new Search(new StringLiteral("NAME:alice")); Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( @@ -266,4 +276,61 @@ public void testRewriteSearchThrowsWhenFieldMissing() throws Exception { Assertions.assertInstanceOf(AnalysisException.class, thrown.getCause()); Assertions.assertTrue(thrown.getCause().getMessage().contains("unknown_field")); } + + @Test + public void testRewriteSearchThrowsWhenColumnHasNoInvertedIndex() throws Exception { + // PlanConstructor.student has the 'name' column but no inverted index on it. The rewrite + // must surface a clear error instead of letting BE silently return an empty bitmap. + LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + PlanConstructor.student, ImmutableList.of("db")); + Search searchFunc = new Search(new StringLiteral("name:alice")); + + Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( + "rewriteSearch", Search.class, LogicalOlapScan.class); + rewriteMethod.setAccessible(true); + + InvocationTargetException thrown = Assertions.assertThrows(InvocationTargetException.class, + () -> rewriteMethod.invoke(rewriteRule, searchFunc, scan)); + Assertions.assertNotNull(thrown.getCause()); + Assertions.assertInstanceOf(AnalysisException.class, thrown.getCause()); + Assertions.assertTrue(thrown.getCause().getMessage().contains("inverted index"), + "Error message should mention inverted index, got: " + thrown.getCause().getMessage()); + Assertions.assertTrue(thrown.getCause().getMessage().contains("name")); + } + + @Test + public void testRewriteSearchSucceedsWhenColumnHasInvertedIndex() throws Exception { + LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + buildStudentWithInvertedIndexOnName(101L), ImmutableList.of("db")); + Search searchFunc = new Search(new StringLiteral("name:alice")); + + Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( + "rewriteSearch", Search.class, LogicalOlapScan.class); + rewriteMethod.setAccessible(true); + + Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan); + Assertions.assertInstanceOf(SearchExpression.class, rewritten); + + SearchExpression searchExpression = (SearchExpression) rewritten; + Assertions.assertEquals(1, searchExpression.getSlotChildren().size()); + Assertions.assertTrue(searchExpression.getSlotChildren().get(0) instanceof SlotReference); + Assertions.assertEquals("name", + ((SlotReference) searchExpression.getSlotChildren().get(0)).getName()); + } + + private static OlapTable buildStudentWithInvertedIndexOnName(long tableId) { + List columns = ImmutableList.of( + new Column("id", Type.INT, true, AggregateType.NONE, "0", ""), + new Column("gender", Type.INT, false, AggregateType.NONE, "0", ""), + new Column("name", Type.STRING, true, AggregateType.NONE, "", ""), + new Column("age", Type.INT, true, AggregateType.NONE, "", "")); + Index invertedOnName = new Index(1L, "idx_name", ImmutableList.of("name"), + IndexType.INVERTED, null, ""); + OlapTable table = new OlapTable(tableId, "student_with_inverted_index", false, columns, + KeysType.PRIMARY_KEYS, new PartitionInfo(), null, + new TableIndexes(ImmutableList.of(invertedOnName))); + table.setIndexMeta(-1, "student_with_inverted_index", table.getFullSchema(), + 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS); + return table; + } } From 35793b7246e9d308cd0fc2eb590e6aaf13cdf8ca Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 26 May 2026 21:09:58 +0800 Subject: [PATCH 2/3] [test](search) align test_search_function with new FE-side SEARCH validation ### What problem does this PR solve? Issue Number: close #N/A (follow-up to Jira CIR-20006 / PR #63637) Problem Summary: Test 22 of `regression-test/suites/search/test_search_function.groovy` covered "SEARCH on a column without inverted index" and asserted the error message contained the old BE-side text `"SearchExpr should not be executed without inverted index"`. After the fix for CIR-20006, that scenario is now rejected at FE planning time in `RewriteSearchToSlots.checkInvertedIndexExists`, with an `AnalysisException` whose message contains `"inverted index"` and names the offending column. Update the assertion so the test passes on a build that includes the FE-side check (and also explicitly verifies the catch block actually fired, instead of silently passing if the SQL unexpectedly succeeds). ### Release note None (test-only change). ### Check List (For Author) - Test: - Regression-test only: regression-test/suites/search/test_search_function.groovy - Behavior changed: No - Does this need documentation: No (cherry picked from commit 110297554eaa5a5a11300fa1afec8d638d370208) --- .../suites/search/test_search_function.groovy | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/regression-test/suites/search/test_search_function.groovy b/regression-test/suites/search/test_search_function.groovy index 61ee8e4b026897..25fb08cef5a106 100644 --- a/regression-test/suites/search/test_search_function.groovy +++ b/regression-test/suites/search/test_search_function.groovy @@ -153,11 +153,20 @@ suite("test_search_function", "p0") { // Test 21: ALL query test qt_sql "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${indexTableName} WHERE search('tags:ALL(machine learning)') ORDER BY id" - // Test 22: Search on non-indexed table (will throw exception) + // Test 22: Search on non-indexed table — must now throw at FE planning time. + // After the fix for Jira CIR-20006, RewriteSearchToSlots refuses to rewrite + // a SEARCH predicate against a column that has no inverted index, with an + // AnalysisException that names the column and points at "inverted index". + boolean threw = false try { sql """SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} WHERE search('title:Machine') ORDER BY id""" } catch (Exception e) { + threw = true logger.info(e.getMessage()) - assertTrue(e.getMessage().contains("SearchExpr should not be executed without inverted index")) + assertTrue(e.getMessage().contains("inverted index"), + "expected error to mention 'inverted index', got: ${e.getMessage()}") + assertTrue(e.getMessage().contains("title"), + "expected error to mention 'title', got: ${e.getMessage()}") } + assertTrue(threw, "expected AnalysisException for SEARCH on column without inverted index") } From 42c96ab0e315d568d5881481ea009af0d0ffc7de Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 27 May 2026 12:03:57 +0800 Subject: [PATCH 3/3] [fix](search) Normalize variant SEARCH parent field ### What problem does this PR solve? Issue Number: close #N/A Related PR: #63637 Problem Summary: Variant subcolumn SEARCH rewrites resolved the parent slot case-insensitively, but the inverted-index validation still used the parent name exactly as written in the DSL. A valid predicate such as SEARCH('V.foo:bar') on a table with variant column v could therefore fail validation with Column 'V' not found. Use the resolved parent slot name for validation and normalize the field binding to the canonical parent path. ### Release note SEARCH() on variant subcolumns now resolves the parent column name case-insensitively during inverted-index validation. ### Check List (For Author) - Test: Unit Test - ./run-fe-ut.sh --run org.apache.doris.nereids.rules.rewrite.RewriteSearchToSlotsTest - Behavior changed: Yes - valid variant SEARCH predicates with differently-cased parent column names are no longer rejected. - Does this need documentation: No (cherry picked from commit d4d6f38762cefdd2c6ad323237327e540ec842a9) --- .../rules/rewrite/RewriteSearchToSlots.java | 10 +++-- .../rewrite/RewriteSearchToSlotsTest.java | 42 ++++++++++++++++++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java index 9ab8d7870dc28a..33f32284e5a0cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java @@ -17,10 +17,10 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.analysis.IndexDef.IndexType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Index; import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.info.IndexType; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; @@ -131,22 +131,24 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { "Field '%s' is not VARIANT type for subcolumn access: %s", parentFieldName, search.getDslString())); } + String normalizedParentFieldName = parentSlot.getName(); // Check the parent variant column has at least one INVERTED index. The concrete // subcolumn binding is resolved per-segment in BE, so we only enforce the parent // level here. See function_search.cpp is_variant_sub branch. - checkInvertedIndexExists(scan.getTable(), parentFieldName, search.getDslString(), true); + checkInvertedIndexExists(scan.getTable(), normalizedParentFieldName, + search.getDslString(), true); // Create ElementAt expression for variant subcolumn // This will be converted to an extracted column slot by VariantSubPathPruning rule // If the subcolumn doesn't exist, ElementAt will remain and BE will handle it gracefully childExpr = new ElementAt(parentSlot, new StringLiteral(subcolumnPath)); - normalizedFieldName = originalFieldName; // Keep full path for field binding + normalizedFieldName = normalizedParentFieldName + "." + subcolumnPath; LOG.info( "Created ElementAt expression for variant subcolumn: parent='{}', " + "subcolumn='{}', field_name='{}'", - parentFieldName, subcolumnPath, normalizedFieldName); + normalizedParentFieldName, subcolumnPath, normalizedFieldName); } else { // Normal field - find slot directly Slot slot = findSlotByName(originalFieldName, scan); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java index 877951c32bb085..49fe9e7150474a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.analysis.IndexDef.IndexType; import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Index; @@ -25,12 +26,12 @@ import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.TableIndexes; import org.apache.doris.catalog.Type; -import org.apache.doris.catalog.info.IndexType; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.SearchExpression; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.scalar.Search; import org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser; import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; @@ -260,6 +261,31 @@ public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception { Assertions.assertEquals("name", normalizedPlan.getRoot().getField()); } + @Test + public void testRewriteSearchHandlesCaseInsensitiveVariantParentField() throws Exception { + LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + buildVariantTableWithInvertedIndex(102L), ImmutableList.of("db")); + Search searchFunc = new Search(new StringLiteral("V.foo:bar")); + + Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( + "rewriteSearch", Search.class, LogicalOlapScan.class); + rewriteMethod.setAccessible(true); + + Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan); + Assertions.assertInstanceOf(SearchExpression.class, rewritten); + + SearchExpression searchExpression = (SearchExpression) rewritten; + Assertions.assertEquals(1, searchExpression.getSlotChildren().size()); + Assertions.assertTrue(searchExpression.getSlotChildren().get(0) instanceof ElementAt); + ElementAt elementAt = (ElementAt) searchExpression.getSlotChildren().get(0); + Assertions.assertTrue(elementAt.child(0) instanceof SlotReference); + Assertions.assertEquals("v", ((SlotReference) elementAt.child(0)).getName()); + + SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan(); + Assertions.assertEquals("v.foo", normalizedPlan.getFieldBindings().get(0).getFieldName()); + Assertions.assertEquals("v.foo", normalizedPlan.getRoot().getField()); + } + @Test public void testRewriteSearchThrowsWhenFieldMissing() throws Exception { LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), @@ -333,4 +359,18 @@ KeysType.PRIMARY_KEYS, new PartitionInfo(), null, 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS); return table; } + + private static OlapTable buildVariantTableWithInvertedIndex(long tableId) { + List columns = ImmutableList.of( + new Column("id", Type.INT, true, AggregateType.NONE, "0", ""), + new Column("v", Type.VARIANT, false, AggregateType.NONE, "", "")); + Index invertedOnVariant = new Index(2L, "idx_v", ImmutableList.of("v"), + IndexType.INVERTED, null, ""); + OlapTable table = new OlapTable(tableId, "variant_with_inverted_index", false, columns, + KeysType.PRIMARY_KEYS, new PartitionInfo(), null, + new TableIndexes(ImmutableList.of(invertedOnVariant))); + table.setIndexMeta(-1, "variant_with_inverted_index", table.getFullSchema(), + 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS); + return table; + } }