Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3801,6 +3801,9 @@ public Index getInvertedIndex(Column column, List<String> subPath) {
}

public Index getInvertedIndex(Column column, List<String> subPath, String analyzer) {
if (indexes == null) {
return null;
}
List<Index> invertedIndexes = new ArrayList<>();
for (Index index : indexes.getIndexes()) {
if (index.getIndexType() == IndexDef.IndexType.INVERTED) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.analysis.IndexDef.IndexType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
Expand Down Expand Up @@ -127,17 +131,24 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) {
"Field '%s' is not VARIANT type for subcolumn access: %s",
parentFieldName, search.getDslString()));
}
String normalizedParentFieldName = parentSlot.getName();

// Check the parent variant column has at least one INVERTED index. The concrete
// subcolumn binding is resolved per-segment in BE, so we only enforce the parent
// level here. See function_search.cpp is_variant_sub branch.
checkInvertedIndexExists(scan.getTable(), normalizedParentFieldName,
search.getDslString(), true);

// Create ElementAt expression for variant subcolumn
// This will be converted to an extracted column slot by VariantSubPathPruning rule
// If the subcolumn doesn't exist, ElementAt will remain and BE will handle it gracefully
childExpr = new ElementAt(parentSlot, new StringLiteral(subcolumnPath));
normalizedFieldName = originalFieldName; // Keep full path for field binding
normalizedFieldName = normalizedParentFieldName + "." + subcolumnPath;

LOG.info(
"Created ElementAt expression for variant subcolumn: parent='{}', "
+ "subcolumn='{}', field_name='{}'",
parentFieldName, subcolumnPath, normalizedFieldName);
normalizedParentFieldName, subcolumnPath, normalizedFieldName);
} else {
// Normal field - find slot directly
Slot slot = findSlotByName(originalFieldName, scan);
Expand All @@ -146,6 +157,7 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) {
"Field '%s' not found in table for search: %s",
originalFieldName, search.getDslString()));
}
checkInvertedIndexExists(scan.getTable(), slot.getName(), search.getDslString(), false);
childExpr = slot;
normalizedFieldName = slot.getName();
}
Expand All @@ -168,6 +180,53 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) {
}
}

/**
* Ensure the column referenced by a Lucene-syntax SEARCH predicate has an inverted index.
* Without this check the BE path would silently fall back to an empty bitmap (i.e. all FALSE),
* which is indistinguishable from "no rows matched" to the user. Throw at planning time so the
* behavior is consistent with referencing a non-existent column.
*
* @param table table backing the LogicalOlapScan
* @param columnName column name (parent column name when isVariantParent)
* @param dsl original DSL, used in the error message
* @param isVariantParent true when {@code columnName} is the parent of a variant subcolumn
* access (e.g. {@code msg.body}); for that case any INVERTED index on
* the parent column is accepted because the concrete subcolumn binding
* is resolved per-segment in BE.
*/
private void checkInvertedIndexExists(OlapTable table, String columnName, String dsl,
boolean isVariantParent) {
Column column = table.getColumn(columnName);
if (column == null) {
// Field existence is already validated by findSlotByName; if we reach here the schema
// changed concurrently. Surface a clear error rather than fall through.
throw new AnalysisException(String.format(
"Column '%s' not found in table '%s' for search: %s",
columnName, table.getName(), dsl));
}

if (isVariantParent) {
for (Index index : table.getIndexes()) {
if (index.getIndexType() != IndexType.INVERTED) {
continue;
}
List<String> columns = index.getColumns();
if (columns != null && !columns.isEmpty()
&& columnName.equalsIgnoreCase(columns.get(0))) {
return;
}
}
} else if (table.getInvertedIndex(column, null) != null) {
return;
}

throw new AnalysisException(String.format(
"Field '%s' has no inverted index, cannot be used in search: %s. "
+ "Create an inverted index on the column first "
+ "(ALTER TABLE ... ADD INDEX ... USING INVERTED).",
columnName, dsl));
}

private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
// Direct match only - variant subcolumns are handled by caller
for (Slot slot : scan.getOutput()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,28 @@

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.analysis.IndexDef.IndexType;
import org.apache.doris.catalog.AggregateType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.TableIndexes;
import org.apache.doris.catalog.Type;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SearchExpression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Search;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.thrift.TStorageType;

import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Assertions;
Expand Down Expand Up @@ -229,7 +240,7 @@ public void testSlotReferenceConsistency() {
@Test
public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception {
LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(),
PlanConstructor.student, ImmutableList.of("db"));
buildStudentWithInvertedIndexOnName(100L), ImmutableList.of("db"));
Search searchFunc = new Search(new StringLiteral("NAME:alice"));

Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
Expand All @@ -250,6 +261,31 @@ public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception {
Assertions.assertEquals("name", normalizedPlan.getRoot().getField());
}

@Test
public void testRewriteSearchHandlesCaseInsensitiveVariantParentField() throws Exception {
LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(),
buildVariantTableWithInvertedIndex(102L), ImmutableList.of("db"));
Search searchFunc = new Search(new StringLiteral("V.foo:bar"));

Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
"rewriteSearch", Search.class, LogicalOlapScan.class);
rewriteMethod.setAccessible(true);

Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan);
Assertions.assertInstanceOf(SearchExpression.class, rewritten);

SearchExpression searchExpression = (SearchExpression) rewritten;
Assertions.assertEquals(1, searchExpression.getSlotChildren().size());
Assertions.assertTrue(searchExpression.getSlotChildren().get(0) instanceof ElementAt);
ElementAt elementAt = (ElementAt) searchExpression.getSlotChildren().get(0);
Assertions.assertTrue(elementAt.child(0) instanceof SlotReference);
Assertions.assertEquals("v", ((SlotReference) elementAt.child(0)).getName());

SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan();
Assertions.assertEquals("v.foo", normalizedPlan.getFieldBindings().get(0).getFieldName());
Assertions.assertEquals("v.foo", normalizedPlan.getRoot().getField());
}

@Test
public void testRewriteSearchThrowsWhenFieldMissing() throws Exception {
LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(),
Expand All @@ -266,4 +302,75 @@ public void testRewriteSearchThrowsWhenFieldMissing() throws Exception {
Assertions.assertInstanceOf(AnalysisException.class, thrown.getCause());
Assertions.assertTrue(thrown.getCause().getMessage().contains("unknown_field"));
}

@Test
public void testRewriteSearchThrowsWhenColumnHasNoInvertedIndex() throws Exception {
// PlanConstructor.student has the 'name' column but no inverted index on it. The rewrite
// must surface a clear error instead of letting BE silently return an empty bitmap.
LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(),
PlanConstructor.student, ImmutableList.of("db"));
Search searchFunc = new Search(new StringLiteral("name:alice"));

Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
"rewriteSearch", Search.class, LogicalOlapScan.class);
rewriteMethod.setAccessible(true);

InvocationTargetException thrown = Assertions.assertThrows(InvocationTargetException.class,
() -> rewriteMethod.invoke(rewriteRule, searchFunc, scan));
Assertions.assertNotNull(thrown.getCause());
Assertions.assertInstanceOf(AnalysisException.class, thrown.getCause());
Assertions.assertTrue(thrown.getCause().getMessage().contains("inverted index"),
"Error message should mention inverted index, got: " + thrown.getCause().getMessage());
Assertions.assertTrue(thrown.getCause().getMessage().contains("name"));
}

@Test
public void testRewriteSearchSucceedsWhenColumnHasInvertedIndex() throws Exception {
LogicalOlapScan scan = new LogicalOlapScan(PlanConstructor.getNextRelationId(),
buildStudentWithInvertedIndexOnName(101L), ImmutableList.of("db"));
Search searchFunc = new Search(new StringLiteral("name:alice"));

Method rewriteMethod = RewriteSearchToSlots.class.getDeclaredMethod(
"rewriteSearch", Search.class, LogicalOlapScan.class);
rewriteMethod.setAccessible(true);

Object rewritten = rewriteMethod.invoke(rewriteRule, searchFunc, scan);
Assertions.assertInstanceOf(SearchExpression.class, rewritten);

SearchExpression searchExpression = (SearchExpression) rewritten;
Assertions.assertEquals(1, searchExpression.getSlotChildren().size());
Assertions.assertTrue(searchExpression.getSlotChildren().get(0) instanceof SlotReference);
Assertions.assertEquals("name",
((SlotReference) searchExpression.getSlotChildren().get(0)).getName());
}

private static OlapTable buildStudentWithInvertedIndexOnName(long tableId) {
List<Column> columns = ImmutableList.of(
new Column("id", Type.INT, true, AggregateType.NONE, "0", ""),
new Column("gender", Type.INT, false, AggregateType.NONE, "0", ""),
new Column("name", Type.STRING, true, AggregateType.NONE, "", ""),
new Column("age", Type.INT, true, AggregateType.NONE, "", ""));
Index invertedOnName = new Index(1L, "idx_name", ImmutableList.of("name"),
IndexType.INVERTED, null, "");
OlapTable table = new OlapTable(tableId, "student_with_inverted_index", false, columns,
KeysType.PRIMARY_KEYS, new PartitionInfo(), null,
new TableIndexes(ImmutableList.of(invertedOnName)));
table.setIndexMeta(-1, "student_with_inverted_index", table.getFullSchema(),
0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS);
return table;
}

private static OlapTable buildVariantTableWithInvertedIndex(long tableId) {
List<Column> columns = ImmutableList.of(
new Column("id", Type.INT, true, AggregateType.NONE, "0", ""),
new Column("v", Type.VARIANT, false, AggregateType.NONE, "", ""));
Index invertedOnVariant = new Index(2L, "idx_v", ImmutableList.of("v"),
IndexType.INVERTED, null, "");
OlapTable table = new OlapTable(tableId, "variant_with_inverted_index", false, columns,
KeysType.PRIMARY_KEYS, new PartitionInfo(), null,
new TableIndexes(ImmutableList.of(invertedOnVariant)));
table.setIndexMeta(-1, "variant_with_inverted_index", table.getFullSchema(),
0, 0, (short) 0, TStorageType.COLUMN, KeysType.PRIMARY_KEYS);
return table;
}
}
13 changes: 11 additions & 2 deletions regression-test/suites/search/test_search_function.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,20 @@ suite("test_search_function", "p0") {
// Test 21: ALL query test
qt_sql "SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${indexTableName} WHERE search('tags:ALL(machine learning)') ORDER BY id"

// Test 22: Search on non-indexed table (will throw exception)
// Test 22: Search on non-indexed table — must now throw at FE planning time.
// After the fix for Jira CIR-20006, RewriteSearchToSlots refuses to rewrite
// a SEARCH predicate against a column that has no inverted index, with an
// AnalysisException that names the column and points at "inverted index".
boolean threw = false
try {
sql """SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} WHERE search('title:Machine') ORDER BY id"""
} catch (Exception e) {
threw = true
logger.info(e.getMessage())
assertTrue(e.getMessage().contains("SearchExpr should not be executed without inverted index"))
assertTrue(e.getMessage().contains("inverted index"),
"expected error to mention 'inverted index', got: ${e.getMessage()}")
assertTrue(e.getMessage().contains("title"),
"expected error to mention 'title', got: ${e.getMessage()}")
}
assertTrue(threw, "expected AnalysisException for SEARCH on column without inverted index")
}
Loading