diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 020173cd44d85c..41082811191422 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3801,6 +3801,9 @@ public Index getInvertedIndex(Column column, List subPath) { } public Index getInvertedIndex(Column column, List subPath, String analyzer) { + if (indexes == null) { + return null; + } List invertedIndexes = new ArrayList<>(); for (Index index : indexes.getIndexes()) { if (index.getIndexType() == IndexDef.IndexType.INVERTED) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java index 83da8f99a96821..33f32284e5a0cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java @@ -17,6 +17,10 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.analysis.IndexDef.IndexType; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Index; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; @@ -127,17 +131,24 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { "Field '%s' is not VARIANT type for subcolumn access: %s", parentFieldName, search.getDslString())); } + String normalizedParentFieldName = parentSlot.getName(); + + // Check the parent variant column has at least one INVERTED index. The concrete + // subcolumn binding is resolved per-segment in BE, so we only enforce the parent + // level here. See function_search.cpp is_variant_sub branch. + checkInvertedIndexExists(scan.getTable(), normalizedParentFieldName, + search.getDslString(), true); // Create ElementAt expression for variant subcolumn // This will be converted to an extracted column slot by VariantSubPathPruning rule // If the subcolumn doesn't exist, ElementAt will remain and BE will handle it gracefully childExpr = new ElementAt(parentSlot, new StringLiteral(subcolumnPath)); - normalizedFieldName = originalFieldName; // Keep full path for field binding + normalizedFieldName = normalizedParentFieldName + "." + subcolumnPath; LOG.info( "Created ElementAt expression for variant subcolumn: parent='{}', " + "subcolumn='{}', field_name='{}'", - parentFieldName, subcolumnPath, normalizedFieldName); + normalizedParentFieldName, subcolumnPath, normalizedFieldName); } else { // Normal field - find slot directly Slot slot = findSlotByName(originalFieldName, scan); @@ -146,6 +157,7 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { "Field '%s' not found in table for search: %s", originalFieldName, search.getDslString())); } + checkInvertedIndexExists(scan.getTable(), slot.getName(), search.getDslString(), false); childExpr = slot; normalizedFieldName = slot.getName(); } @@ -168,6 +180,53 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { } } + /** + * Ensure the column referenced by a Lucene-syntax SEARCH predicate has an inverted index. + * Without this check the BE path would silently fall back to an empty bitmap (i.e. all FALSE), + * which is indistinguishable from "no rows matched" to the user. Throw at planning time so the + * behavior is consistent with referencing a non-existent column. + * + * @param table table backing the LogicalOlapScan + * @param columnName column name (parent column name when isVariantParent) + * @param dsl original DSL, used in the error message + * @param isVariantParent true when {@code columnName} is the parent of a variant subcolumn + * access (e.g. {@code msg.body}); for that case any INVERTED index on + * the parent column is accepted because the concrete subcolumn binding + * is resolved per-segment in BE. + */ + private void checkInvertedIndexExists(OlapTable table, String columnName, String dsl, + boolean isVariantParent) { + Column column = table.getColumn(columnName); + if (column == null) { + // Field existence is already validated by findSlotByName; if we reach here the schema + // changed concurrently. Surface a clear error rather than fall through. + throw new AnalysisException(String.format( + "Column '%s' not found in table '%s' for search: %s", + columnName, table.getName(), dsl)); + } + + if (isVariantParent) { + for (Index index : table.getIndexes()) { + if (index.getIndexType() != IndexType.INVERTED) { + continue; + } + List columns = index.getColumns(); + if (columns != null && !columns.isEmpty() + && columnName.equalsIgnoreCase(columns.get(0))) { + return; + } + } + } else if (table.getInvertedIndex(column, null) != null) { + return; + } + + throw new AnalysisException(String.format( + "Field '%s' has no inverted index, cannot be used in search: %s. " + + "Create an inverted index on the column first " + + "(ALTER TABLE ... ADD INDEX ... USING INVERTED).", + columnName, dsl)); + } + private Slot findSlotByName(String fieldName, LogicalOlapScan scan) { // Direct match only - variant subcolumns are handled by caller for (Slot slot : scan.getOutput()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java index 76e25cc3879e65..49fe9e7150474a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java @@ -17,17 +17,28 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.analysis.IndexDef.IndexType; +import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Index; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.TableIndexes; +import org.apache.doris.catalog.Type; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.SearchExpression; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.scalar.Search; import org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser; import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.util.PlanConstructor; +import org.apache.doris.thrift.TStorageType; import com.google.common.collect.ImmutableList; import org.junit.jupiter.api.Assertions; @@ -229,7 +240,7 @@ public void testSlotReferenceConsistency() { @Test public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception { LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), - PlanConstructor.student, ImmutableList.of("db")); + buildStudentWithInvertedIndexOnName(100L), ImmutableList.of("db")); Search searchFunc = new Search(new StringLiteral("NAME:alice")); Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( @@ -250,6 +261,31 @@ public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception { Assertions.assertEquals("name", normalizedPlan.getRoot().getField()); } + @Test + public void testRewriteSearchHandlesCaseInsensitiveVariantParentField() throws Exception { + LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + buildVariantTableWithInvertedIndex(102L), ImmutableList.of("db")); + Search searchFunc = new Search(new StringLiteral("V.foo:bar")); + + Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( + "rewriteSearch", Search.class, LogicalOlapScan.class); + rewriteMethod.setAccessible(true); + + Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan); + Assertions.assertInstanceOf(SearchExpression.class, rewritten); + + SearchExpression searchExpression = (SearchExpression) rewritten; + Assertions.assertEquals(1, searchExpression.getSlotChildren().size()); + Assertions.assertTrue(searchExpression.getSlotChildren().get(0) instanceof ElementAt); + ElementAt elementAt = (ElementAt) searchExpression.getSlotChildren().get(0); + Assertions.assertTrue(elementAt.child(0) instanceof SlotReference); + Assertions.assertEquals("v", ((SlotReference) elementAt.child(0)).getName()); + + SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan(); + Assertions.assertEquals("v.foo", normalizedPlan.getFieldBindings().get(0).getFieldName()); + Assertions.assertEquals("v.foo", normalizedPlan.getRoot().getField()); + } + @Test public void testRewriteSearchThrowsWhenFieldMissing() throws Exception { LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), @@ -266,4 +302,75 @@ public void testRewriteSearchThrowsWhenFieldMissing() throws Exception { Assertions.assertInstanceOf(AnalysisException.class, thrown.getCause()); Assertions.assertTrue(thrown.getCause().getMessage().contains("unknown_field")); } + + @Test + public void testRewriteSearchThrowsWhenColumnHasNoInvertedIndex() throws Exception { + // PlanConstructor.student has the 'name' column but no inverted index on it. The rewrite + // must surface a clear error instead of letting BE silently return an empty bitmap. + LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + PlanConstructor.student, ImmutableList.of("db")); + Search searchFunc = new Search(new StringLiteral("name:alice")); + + Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( + "rewriteSearch", Search.class, LogicalOlapScan.class); + rewriteMethod.setAccessible(true); + + InvocationTargetException thrown = Assertions.assertThrows(InvocationTargetException.class, + () -> rewriteMethod.invoke(rewriteRule, searchFunc, scan)); + Assertions.assertNotNull(thrown.getCause()); + Assertions.assertInstanceOf(AnalysisException.class, thrown.getCause()); + Assertions.assertTrue(thrown.getCause().getMessage().contains("inverted index"), + "Error message should mention inverted index, got: " + thrown.getCause().getMessage()); + Assertions.assertTrue(thrown.getCause().getMessage().contains("name")); + } + + @Test + public void testRewriteSearchSucceedsWhenColumnHasInvertedIndex() throws Exception { + LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(), + buildStudentWithInvertedIndexOnName(101L), ImmutableList.of("db")); + Search searchFunc = new Search(new StringLiteral("name:alice")); + + Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod( + "rewriteSearch", Search.class, LogicalOlapScan.class); + rewriteMethod.setAccessible(true); + + Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan); + Assertions.assertInstanceOf(SearchExpression.class, rewritten); + + SearchExpression searchExpression = (SearchExpression) rewritten; + Assertions.assertEquals(1, searchExpression.getSlotChildren().size()); + Assertions.assertTrue(searchExpression.getSlotChildren().get(0) instanceof SlotReference); + Assertions.assertEquals("name", + ((SlotReference) searchExpression.getSlotChildren().get(0)).getName()); + } + + private static OlapTable buildStudentWithInvertedIndexOnName(long tableId) { + List columns = ImmutableList.of( + new Column("id", Type.INT, true, AggregateType.NONE, "0", ""), + new Column("gender", Type.INT, false, AggregateType.NONE, "0", ""), + new Column("name", Type.STRING, true, AggregateType.NONE, "", ""), + new Column("age", Type.INT, true, AggregateType.NONE, "", "")); + Index invertedOnName = new Index(1L, "idx_name", ImmutableList.of("name"), + IndexType.INVERTED, null, ""); + OlapTable table = new OlapTable(tableId, "student_with_inverted_index", false, columns, + KeysType.PRIMARY_KEYS, new PartitionInfo(), null, + new TableIndexes(ImmutableList.of(invertedOnName))); + table.setIndexMeta(-1, "student_with_inverted_index", table.getFullSchema(), + 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS); + return table; + } + + private static OlapTable buildVariantTableWithInvertedIndex(long tableId) { + List columns = ImmutableList.of( + new Column("id", Type.INT, true, AggregateType.NONE, "0", ""), + new Column("v", Type.VARIANT, false, AggregateType.NONE, "", "")); + Index invertedOnVariant = new Index(2L, "idx_v", ImmutableList.of("v"), + IndexType.INVERTED, null, ""); + OlapTable table = new OlapTable(tableId, "variant_with_inverted_index", false, columns, + KeysType.PRIMARY_KEYS, new PartitionInfo(), null, + new TableIndexes(ImmutableList.of(invertedOnVariant))); + table.setIndexMeta(-1, "variant_with_inverted_index", table.getFullSchema(), + 0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS); + return table; + } } diff --git a/regression-test/suites/search/test_search_function.groovy b/regression-test/suites/search/test_search_function.groovy index 61ee8e4b026897..25fb08cef5a106 100644 --- a/regression-test/suites/search/test_search_function.groovy +++ b/regression-test/suites/search/test_search_function.groovy @@ -153,11 +153,20 @@ suite("test_search_function", "p0") { // Test 21: ALL query test qt_sql "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${indexTableName} WHERE search('tags:ALL(machine learning)') ORDER BY id" - // Test 22: Search on non-indexed table (will throw exception) + // Test 22: Search on non-indexed table — must now throw at FE planning time. + // After the fix for Jira CIR-20006, RewriteSearchToSlots refuses to rewrite + // a SEARCH predicate against a column that has no inverted index, with an + // AnalysisException that names the column and points at "inverted index". + boolean threw = false try { sql """SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} WHERE search('title:Machine') ORDER BY id""" } catch (Exception e) { + threw = true logger.info(e.getMessage()) - assertTrue(e.getMessage().contains("SearchExpr should not be executed without inverted index")) + assertTrue(e.getMessage().contains("inverted index"), + "expected error to mention 'inverted index', got: ${e.getMessage()}") + assertTrue(e.getMessage().contains("title"), + "expected error to mention 'title', got: ${e.getMessage()}") } + assertTrue(threw, "expected AnalysisException for SEARCH on column without inverted index") }