Skip to content

Commit

Permalink
Do not filter nulls from fields that are arguments of NON_NULLABLE fu…
Browse files Browse the repository at this point in the history
…nctions

cr> create table t (a int, b int);
CREATE OK, 1 row affected (0.210 sec)
cr> insert into t values (null, null), (null, 2), (2, null), (2, 2);
INSERT OK, 4 rows affected (0.032 sec)

cr> select * from t where a != (b||1);
+---+---+
| a | b |
+---+---+
| 2 | 2 |  --> missing a row, see below
+---+---+
SELECT 1 row in set (0.022 sec)

cr> select a, b, a != (b||1) from t;
+------+------+----------------------------+
|    a |    b | (NOT (a = concat(b, '1'))) |
+------+------+----------------------------+
| NULL |    2 | NULL                       |
| NULL | NULL | NULL                       |
|    2 | NULL | TRUE                       | --> this row is missing from the result of the query above
|    2 |    2 | TRUE                       |     because of an unwanted FieldExistsQuery over field 'b'
+------+------+----------------------------+
SELECT 4 rows in set (0.023 sec)
  • Loading branch information
jeeminso committed Jan 10, 2024
1 parent 0f65ed9 commit 45243e8
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
Expand Down Expand Up @@ -99,25 +100,28 @@ public Boolean evaluate(TransactionContext txnCtx, NodeContext nodeCtx, Input<Bo
private final NullabilityVisitor INNER_VISITOR = new NullabilityVisitor();

private static class NullabilityContext {
private final HashSet<Reference> references = new HashSet<>();
private boolean removeNullValues = false;
private boolean useNotQuery = false;
private boolean enforceThreeValuedLogic = false;

void add(Reference symbol) {
references.add(symbol);
private HashSet<Reference> nullableReferences = new HashSet<>();
private boolean isNullable = true;
private AtomicBoolean enforceThreeValuedLogic = new AtomicBoolean(false);

private static NullabilityContext of(NullabilityContext parent) {
NullabilityContext child = new NullabilityContext();
child.nullableReferences = parent.nullableReferences;
child.isNullable = parent.isNullable;
child.enforceThreeValuedLogic = parent.enforceThreeValuedLogic;
return child;
}

Set<Reference> references() {
return references;
void collectNullableReferences(Reference symbol) {
nullableReferences.add(symbol);
}

boolean useNotQuery() {
return useNotQuery && !enforceThreeValuedLogic;
Set<Reference> nullableReferences() {
return nullableReferences;
}

boolean useFieldExistQuery() {
return removeNullValues && !enforceThreeValuedLogic;
boolean enforceThreeValuedLogic() {
return enforceThreeValuedLogic.get();
}
}

Expand All @@ -127,7 +131,10 @@ private static class NullabilityVisitor extends SymbolVisitor<NullabilityContext

@Override
public Void visitReference(Reference symbol, NullabilityContext context) {
context.add(symbol);
// if ref is nullable and all its parents are nullable
if (symbol.isNullable() && context.isNullable) {
context.collectNullableReferences(symbol);
}
return null;
}

Expand All @@ -142,27 +149,24 @@ public Void visitFunction(Function function, NullabilityContext context) {
var b = function.arguments().get(1);
if (a instanceof Reference ref && b instanceof Literal<?>) {
if (ref.valueType().id() == DataTypes.UNTYPED_OBJECT.id()) {
context.removeNullValues = true;
return null;
}
}
} else if (Ignore3vlFunction.NAME.equals(functionName)) {
context.useNotQuery = true;
context.isNullable = false;
return null;
} else {
var signature = function.signature();
if (signature.hasFeature(Feature.NULLABLE)) {
context.removeNullValues = true;
} else if (signature.hasFeature(Feature.NON_NULLABLE)) {
context.useNotQuery = true;
} else {
if (signature.hasFeature(Feature.NON_NULLABLE)) {
context.isNullable = false;
} else if (!signature.hasFeature(Feature.NULLABLE)) {
// default case
context.enforceThreeValuedLogic = true;
context.enforceThreeValuedLogic.set(Boolean.TRUE);
return null;
}
}
for (Symbol arg : function.arguments()) {
arg.accept(this, context);
arg.accept(this, NullabilityContext.of(context));
}
return null;
}
Expand Down Expand Up @@ -202,31 +206,25 @@ public Query toQuery(Function input, LuceneQueryBuilder.Context context) {
NullabilityContext ctx = new NullabilityContext();
arg.accept(INNER_VISITOR, ctx);

if (ctx.useFieldExistQuery()) {
// we can optimize with a field exist query and filter out all null values which will reduce the
// result set of the query
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(notX, BooleanClause.Occur.MUST);
for (Reference reference : ctx.references()) {
if (reference.isNullable()) {
var refExistsQuery = IsNullPredicate.refExistsQuery(reference, context, false);
if (refExistsQuery != null) {
builder.add(refExistsQuery, BooleanClause.Occur.MUST);
}
}
}
return builder.build();
} else if (ctx.useNotQuery()) {
return new BooleanQuery.Builder()
.add(notX, Occur.MUST)
.build();
} else {
if (ctx.enforceThreeValuedLogic()) {
// we require strict 3vl logic, therefore we need to add the function as generic function filter
// which is less efficient
return new BooleanQuery.Builder()
.add(notX, Occur.MUST)
.add(LuceneQueryBuilder.genericFunctionFilter(input, context), Occur.FILTER)
.build();
} else {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(notX, BooleanClause.Occur.MUST);
for (Reference nullableRef : ctx.nullableReferences()) {
// we can optimize with a field exist query and filter out all null values which will reduce the
// result set of the query
var refExistsQuery = IsNullPredicate.refExistsQuery(nullableRef, context, false);
if (refExistsQuery != null) {
builder.add(refExistsQuery, BooleanClause.Occur.MUST);
}
}
return builder.build();
}
}
}
30 changes: 30 additions & 0 deletions server/src/test/java/io/crate/lucene/CommonQueryBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -750,10 +750,40 @@ public void test_neq_operator_on_nullable_and_not_nullable_args_filters_nulls()
}
}

// tracks a bug : https://github.com/crate/crate/pull/15280#issue-2064743724
@Test
public void test_neq_operator_on_nullable_and_not_nullable_args_does_not_filter_nulls_from_non_nullable_arg() throws Exception {
long[] oid = new long[] {123, 124};
int[] oidIdx = new int[]{0};
try (QueryTester tester = new QueryTester.Builder(
THREAD_POOL,
clusterService,
Version.CURRENT,
"create table t (a int, b int)",
() -> oid[oidIdx[0]++]) // oid mapping: a: 123, b: 124
.indexValues(List.of("a", "b"), null, null)
.indexValues(List.of("a", "b"), null, 2)
.indexValues(List.of("a", "b"), 2, null)
.indexValues(List.of("a", "b"), 2, 2)
.build()) {
assertThat(oidIdx[0]).isEqualTo(2);
Query query = tester.toQuery("a != b||1"); // where a is nullable and b||1 is not null
assertThat(query).hasToString(String.format("+(+*:* -(a = concat(b, '1'))) +FieldExistsQuery [field=%s]", oid[0]));
assertThat(tester.runQuery("b", "a != b||1")).containsExactlyInAnyOrder(2, null);
}
}

// tracks a bug: https://github.com/crate/crate/issues/15232
@Test
public void test_cannot_use_field_exists_query_on_args_of_coalesce_function() {
Query query = convert("coalesce(x, y) <> 0");
assertThat(query).hasToString("+(+*:* -(coalesce(x, y) = 0)) #(NOT (coalesce(x, y) = 0))");
}

// tracks a bug : https://github.com/crate/crate/issues/15265
@Test
public void test_nested_not_operators() {
Query query = convert("not (y is not null)");
assertThat(query).hasToString("+(+*:* -FieldExistsQuery [field=y])");
}
}
18 changes: 18 additions & 0 deletions server/src/testFixtures/java/io/crate/testing/QueryTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

import io.crate.analyze.relations.DocTableRelation;
import io.crate.common.collections.Iterables;
import io.crate.common.collections.Lists2;
import io.crate.data.Input;
import io.crate.execution.dml.IndexItem;
import io.crate.execution.dml.Indexer;
Expand Down Expand Up @@ -150,6 +151,23 @@ public Builder indexValue(String column, Object value) throws IOException {
return this;
}

public Builder indexValues(List<String> columns, Object ... values) throws IOException {
MapperService mapperService = indexEnv.mapperService();
Indexer indexer = new Indexer(
table.concreteIndices()[0],
table,
plannerContext.transactionContext(),
plannerContext.nodeContext(),
mapperService::getLuceneFieldType,
Lists2.map(columns, c -> table.getReference(ColumnIdent.fromPath(c))),
null
);
var item = new IndexItem.StaticItem("dummy-id", List.of(), values, -1L, -1L);
ParsedDocument parsedDocument = indexer.index(item);
indexEnv.writer().addDocument(parsedDocument.doc());
return this;
}

private LuceneBatchIterator getIterator(ColumnIdent column, Query query) {
InputFactory inputFactory = new InputFactory(plannerContext.nodeContext());
InputFactory.Context<LuceneCollectorExpression<?>> ctx = inputFactory.ctxForRefs(
Expand Down

0 comments on commit 45243e8

Please sign in to comment.