Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not use FieldExistsQuery over REF that is arg of NON_NULLABLE function #15280

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,20 @@ public Boolean evaluate(TransactionContext txnCtx, NodeContext nodeCtx, Input<Bo
private final NullabilityVisitor INNER_VISITOR = new NullabilityVisitor();

private static class NullabilityContext {
private final HashSet<Reference> references = new HashSet<>();
private boolean removeNullValues = false;
private boolean useNotQuery = false;
private final HashSet<Reference> nullableReferences = new HashSet<>();
private boolean isNullable = true;
private boolean enforceThreeValuedLogic = false;

void add(Reference symbol) {
references.add(symbol);
void collectNullableReferences(Reference symbol) {
nullableReferences.add(symbol);
}

Set<Reference> references() {
return references;
Set<Reference> nullableReferences() {
return nullableReferences;
}

boolean useNotQuery() {
return useNotQuery && !enforceThreeValuedLogic;
}

boolean useFieldExistQuery() {
return removeNullValues && !enforceThreeValuedLogic;
boolean enforceThreeValuedLogic() {
return enforceThreeValuedLogic;
}
}

Expand All @@ -127,7 +122,10 @@ private static class NullabilityVisitor extends SymbolVisitor<NullabilityContext

@Override
public Void visitReference(Reference symbol, NullabilityContext context) {
context.add(symbol);
// if ref is nullable and all its parents are nullable
if (symbol.isNullable() && context.isNullable) {
context.collectNullableReferences(symbol);
}
return null;
}

Expand All @@ -142,27 +140,28 @@ public Void visitFunction(Function function, NullabilityContext context) {
var b = function.arguments().get(1);
if (a instanceof Reference ref && b instanceof Literal<?>) {
if (ref.valueType().id() == DataTypes.UNTYPED_OBJECT.id()) {
context.removeNullValues = true;
return null;
}
}
} else if (Ignore3vlFunction.NAME.equals(functionName)) {
context.useNotQuery = true;
context.isNullable = false;
return null;
} else {
var signature = function.signature();
if (signature.hasFeature(Feature.NULLABLE)) {
context.removeNullValues = true;
} else if (signature.hasFeature(Feature.NON_NULLABLE)) {
context.useNotQuery = true;
} else {
if (signature.hasFeature(Feature.NON_NULLABLE)) {
context.isNullable = false;
} else if (!signature.hasFeature(Feature.NULLABLE)) {
// default case
context.enforceThreeValuedLogic = true;
return null;
}
}
// saves and restores isNullable of the current context
// such that any non-nullables observed from the left arg is not transferred to the right arg.
boolean isNullable = context.isNullable;
for (Symbol arg : function.arguments()) {
arg.accept(this, context);
context.isNullable = isNullable;
Comment on lines +159 to +164
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for reviewing @mkleen . I think the static method of was the cause of the confusion. Please check again.

}
return null;
}
Expand Down Expand Up @@ -202,31 +201,25 @@ public Query toQuery(Function input, LuceneQueryBuilder.Context context) {
NullabilityContext ctx = new NullabilityContext();
arg.accept(INNER_VISITOR, ctx);

if (ctx.useFieldExistQuery()) {
// we can optimize with a field exist query and filter out all null values which will reduce the
// result set of the query
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(notX, BooleanClause.Occur.MUST);
for (Reference reference : ctx.references()) {
if (reference.isNullable()) {
var refExistsQuery = IsNullPredicate.refExistsQuery(reference, context, false);
if (refExistsQuery != null) {
builder.add(refExistsQuery, BooleanClause.Occur.MUST);
}
}
}
return builder.build();
} else if (ctx.useNotQuery()) {
return new BooleanQuery.Builder()
.add(notX, Occur.MUST)
.build();
} else {
if (ctx.enforceThreeValuedLogic()) {
// we require strict 3vl logic, therefore we need to add the function as generic function filter
// which is less efficient
return new BooleanQuery.Builder()
.add(notX, Occur.MUST)
.add(LuceneQueryBuilder.genericFunctionFilter(input, context), Occur.FILTER)
.build();
} else {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(notX, BooleanClause.Occur.MUST);
for (Reference nullableRef : ctx.nullableReferences()) {
// we can optimize with a field exist query and filter out all null values which will reduce the
// result set of the query
var refExistsQuery = IsNullPredicate.refExistsQuery(nullableRef, context, false);
if (refExistsQuery != null) {
builder.add(refExistsQuery, BooleanClause.Occur.MUST);
}
}
return builder.build();
}
}
}
30 changes: 30 additions & 0 deletions server/src/test/java/io/crate/lucene/CommonQueryBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -750,10 +750,40 @@ public void test_neq_operator_on_nullable_and_not_nullable_args_filters_nulls()
}
}

// tracks a bug : https://github.com/crate/crate/pull/15280#issue-2064743724
@Test
public void test_neq_operator_on_nullable_and_not_nullable_args_does_not_filter_nulls_from_non_nullable_arg() throws Exception {
long[] oid = new long[] {123, 124};
int[] oidIdx = new int[]{0};
try (QueryTester tester = new QueryTester.Builder(
THREAD_POOL,
clusterService,
Version.CURRENT,
"create table t (a int, b int)",
() -> oid[oidIdx[0]++]) // oid mapping: a: 123, b: 124
.indexValues(List.of("a", "b"), null, null)
.indexValues(List.of("a", "b"), null, 2)
.indexValues(List.of("a", "b"), 2, null)
.indexValues(List.of("a", "b"), 2, 2)
.build()) {
assertThat(oidIdx[0]).isEqualTo(2);
Query query = tester.toQuery("a != b||1"); // where a is nullable and b||1 is not null
assertThat(query).hasToString(String.format("+(+*:* -(a = concat(b, '1'))) +FieldExistsQuery [field=%s]", oid[0]));
assertThat(tester.runQuery("b", "a != b||1")).containsExactlyInAnyOrder(2, null);
}
}

// tracks a bug: https://github.com/crate/crate/issues/15232
@Test
public void test_cannot_use_field_exists_query_on_args_of_coalesce_function() {
Query query = convert("coalesce(x, y) <> 0");
assertThat(query).hasToString("+(+*:* -(coalesce(x, y) = 0)) #(NOT (coalesce(x, y) = 0))");
}

// tracks a bug : https://github.com/crate/crate/issues/15265
@Test
public void test_nested_not_operators() {
Query query = convert("not (y is not null)");
assertThat(query).hasToString("+(+*:* -FieldExistsQuery [field=y])");
}
}
18 changes: 18 additions & 0 deletions server/src/testFixtures/java/io/crate/testing/QueryTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

import io.crate.analyze.relations.DocTableRelation;
import io.crate.common.collections.Iterables;
import io.crate.common.collections.Lists2;
import io.crate.data.Input;
import io.crate.execution.dml.IndexItem;
import io.crate.execution.dml.Indexer;
Expand Down Expand Up @@ -150,6 +151,23 @@ public Builder indexValue(String column, Object value) throws IOException {
return this;
}

public Builder indexValues(List<String> columns, Object ... values) throws IOException {
MapperService mapperService = indexEnv.mapperService();
Indexer indexer = new Indexer(
table.concreteIndices()[0],
table,
plannerContext.transactionContext(),
plannerContext.nodeContext(),
mapperService::getLuceneFieldType,
Lists2.map(columns, c -> table.getReference(ColumnIdent.fromPath(c))),
null
);
var item = new IndexItem.StaticItem("dummy-id", List.of(), values, -1L, -1L);
ParsedDocument parsedDocument = indexer.index(item);
indexEnv.writer().addDocument(parsedDocument.doc());
return this;
}

private LuceneBatchIterator getIterator(ColumnIdent column, Query query) {
InputFactory inputFactory = new InputFactory(plannerContext.nodeContext());
InputFactory.Context<LuceneCollectorExpression<?>> ctx = inputFactory.ctxForRefs(
Expand Down