From a267e9ed1dc1219a3522665a3eb21dd81dbefd62 Mon Sep 17 00:00:00 2001 From: Minsoo Jee Date: Wed, 3 Jan 2024 16:28:26 -0500 Subject: [PATCH] Do not filter nulls from fields that are arguments of NON_NULLABLE functions cr> create table t (a int, b int); CREATE OK, 1 row affected (0.210 sec) cr> insert into t values (null, null), (null, 2), (2, null), (2, 2); INSERT OK, 4 rows affected (0.032 sec) cr> select * from t where a != (b||1); +---+---+ | a | b | +---+---+ | 2 | 2 | --> missing a row, see below +---+---+ SELECT 1 row in set (0.022 sec) cr> select a, b, a != (b||1) from t; +------+------+----------------------------+ | a | b | (NOT (a = concat(b, '1'))) | +------+------+----------------------------+ | NULL | 2 | NULL | | NULL | NULL | NULL | | 2 | NULL | TRUE | --> this row is missing from the result of the query above | 2 | 2 | TRUE | because of an unwanted FieldExistsQuery over field 'b' +------+------+----------------------------+ SELECT 4 rows in set (0.023 sec) --- .../expression/predicate/NotPredicate.java | 73 +++++++++---------- .../crate/lucene/CommonQueryBuilderTest.java | 30 ++++++++ .../java/io/crate/testing/QueryTester.java | 18 +++++ 3 files changed, 81 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/io/crate/expression/predicate/NotPredicate.java b/server/src/main/java/io/crate/expression/predicate/NotPredicate.java index d9fc9acc2684..c797e601702a 100644 --- a/server/src/main/java/io/crate/expression/predicate/NotPredicate.java +++ b/server/src/main/java/io/crate/expression/predicate/NotPredicate.java @@ -99,25 +99,20 @@ public Boolean evaluate(TransactionContext txnCtx, NodeContext nodeCtx, Input references = new HashSet<>(); - private boolean removeNullValues = false; - private boolean useNotQuery = false; + private final HashSet nullableReferences = new HashSet<>(); + private boolean isNullable = true; private boolean enforceThreeValuedLogic = false; - void add(Reference symbol) { - references.add(symbol); + void collectNullableReferences(Reference symbol) { + nullableReferences.add(symbol); } - Set references() { - return references; + Set nullableReferences() { + return nullableReferences; } - boolean useNotQuery() { - return useNotQuery && !enforceThreeValuedLogic; - } - - boolean useFieldExistQuery() { - return removeNullValues && !enforceThreeValuedLogic; + boolean enforceThreeValuedLogic() { + return enforceThreeValuedLogic; } } @@ -127,7 +122,10 @@ private static class NullabilityVisitor extends SymbolVisitor) { if (ref.valueType().id() == DataTypes.UNTYPED_OBJECT.id()) { - context.removeNullValues = true; return null; } } } else if (Ignore3vlFunction.NAME.equals(functionName)) { - context.useNotQuery = true; + context.isNullable = false; return null; } else { var signature = function.signature(); - if (signature.hasFeature(Feature.NULLABLE)) { - context.removeNullValues = true; - } else if (signature.hasFeature(Feature.NON_NULLABLE)) { - context.useNotQuery = true; - } else { + if (signature.hasFeature(Feature.NON_NULLABLE)) { + context.isNullable = false; + } else if (!signature.hasFeature(Feature.NULLABLE)) { // default case context.enforceThreeValuedLogic = true; return null; } } + // saves and restores isNullable of the current context + // such that any non-nullables observed from the left arg is not transferred to the right arg. + boolean isNullable = context.isNullable; for (Symbol arg : function.arguments()) { arg.accept(this, context); + context.isNullable = isNullable; } return null; } @@ -202,31 +201,25 @@ public Query toQuery(Function input, LuceneQueryBuilder.Context context) { NullabilityContext ctx = new NullabilityContext(); arg.accept(INNER_VISITOR, ctx); - if (ctx.useFieldExistQuery()) { - // we can optimize with a field exist query and filter out all null values which will reduce the - // result set of the query - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(notX, BooleanClause.Occur.MUST); - for (Reference reference : ctx.references()) { - if (reference.isNullable()) { - var refExistsQuery = IsNullPredicate.refExistsQuery(reference, context, false); - if (refExistsQuery != null) { - builder.add(refExistsQuery, BooleanClause.Occur.MUST); - } - } - } - return builder.build(); - } else if (ctx.useNotQuery()) { - return new BooleanQuery.Builder() - .add(notX, Occur.MUST) - .build(); - } else { + if (ctx.enforceThreeValuedLogic()) { // we require strict 3vl logic, therefore we need to add the function as generic function filter // which is less efficient return new BooleanQuery.Builder() .add(notX, Occur.MUST) .add(LuceneQueryBuilder.genericFunctionFilter(input, context), Occur.FILTER) .build(); + } else { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(notX, BooleanClause.Occur.MUST); + for (Reference nullableRef : ctx.nullableReferences()) { + // we can optimize with a field exist query and filter out all null values which will reduce the + // result set of the query + var refExistsQuery = IsNullPredicate.refExistsQuery(nullableRef, context, false); + if (refExistsQuery != null) { + builder.add(refExistsQuery, BooleanClause.Occur.MUST); + } + } + return builder.build(); } } } diff --git a/server/src/test/java/io/crate/lucene/CommonQueryBuilderTest.java b/server/src/test/java/io/crate/lucene/CommonQueryBuilderTest.java index 6ad7000316be..5ab29982f6f0 100644 --- a/server/src/test/java/io/crate/lucene/CommonQueryBuilderTest.java +++ b/server/src/test/java/io/crate/lucene/CommonQueryBuilderTest.java @@ -750,10 +750,40 @@ public void test_neq_operator_on_nullable_and_not_nullable_args_filters_nulls() } } + // tracks a bug : https://github.com/crate/crate/pull/15280#issue-2064743724 + @Test + public void test_neq_operator_on_nullable_and_not_nullable_args_does_not_filter_nulls_from_non_nullable_arg() throws Exception { + long[] oid = new long[] {123, 124}; + int[] oidIdx = new int[]{0}; + try (QueryTester tester = new QueryTester.Builder( + THREAD_POOL, + clusterService, + Version.CURRENT, + "create table t (a int, b int)", + () -> oid[oidIdx[0]++]) // oid mapping: a: 123, b: 124 + .indexValues(List.of("a", "b"), null, null) + .indexValues(List.of("a", "b"), null, 2) + .indexValues(List.of("a", "b"), 2, null) + .indexValues(List.of("a", "b"), 2, 2) + .build()) { + assertThat(oidIdx[0]).isEqualTo(2); + Query query = tester.toQuery("a != b||1"); // where a is nullable and b||1 is not null + assertThat(query).hasToString(String.format("+(+*:* -(a = concat(b, '1'))) +FieldExistsQuery [field=%s]", oid[0])); + assertThat(tester.runQuery("b", "a != b||1")).containsExactlyInAnyOrder(2, null); + } + } + // tracks a bug: https://github.com/crate/crate/issues/15232 @Test public void test_cannot_use_field_exists_query_on_args_of_coalesce_function() { Query query = convert("coalesce(x, y) <> 0"); assertThat(query).hasToString("+(+*:* -(coalesce(x, y) = 0)) #(NOT (coalesce(x, y) = 0))"); } + + // tracks a bug : https://github.com/crate/crate/issues/15265 + @Test + public void test_nested_not_operators() { + Query query = convert("not (y is not null)"); + assertThat(query).hasToString("+(+*:* -FieldExistsQuery [field=y])"); + } } diff --git a/server/src/testFixtures/java/io/crate/testing/QueryTester.java b/server/src/testFixtures/java/io/crate/testing/QueryTester.java index 369c3f13ab1c..7c5d2648df05 100644 --- a/server/src/testFixtures/java/io/crate/testing/QueryTester.java +++ b/server/src/testFixtures/java/io/crate/testing/QueryTester.java @@ -44,6 +44,7 @@ import io.crate.analyze.relations.DocTableRelation; import io.crate.common.collections.Iterables; +import io.crate.common.collections.Lists2; import io.crate.data.Input; import io.crate.execution.dml.IndexItem; import io.crate.execution.dml.Indexer; @@ -150,6 +151,23 @@ public Builder indexValue(String column, Object value) throws IOException { return this; } + public Builder indexValues(List columns, Object ... values) throws IOException { + MapperService mapperService = indexEnv.mapperService(); + Indexer indexer = new Indexer( + table.concreteIndices()[0], + table, + plannerContext.transactionContext(), + plannerContext.nodeContext(), + mapperService::getLuceneFieldType, + Lists2.map(columns, c -> table.getReference(ColumnIdent.fromPath(c))), + null + ); + var item = new IndexItem.StaticItem("dummy-id", List.of(), values, -1L, -1L); + ParsedDocument parsedDocument = indexer.index(item); + indexEnv.writer().addDocument(parsedDocument.doc()); + return this; + } + private LuceneBatchIterator getIterator(ColumnIdent column, Query query) { InputFactory inputFactory = new InputFactory(plannerContext.nodeContext()); InputFactory.Context> ctx = inputFactory.ctxForRefs(