Skip to content

Commit

Permalink
Use index for IS NOT NULL [HZ-3014]
Browse files Browse the repository at this point in the history
  • Loading branch information
k-jamroz committed Sep 26, 2023
1 parent ca9f950 commit ef2ace9
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,26 @@ private static void createFromIndexFilterInt(
// based on null end (before conversion) meaning different things.
// The above affects only `from` because NULLs are smaller than any other value and only DESC sort order
// for which `to` is updated during the scan.
result.add(IndexIterationPointer.create(!compositeIndex && descending ? NULL : null, true,
null, true, descending, null));
if (!compositeIndex && descending) {
result.add(IndexIterationPointer.ALL_ALT_DESC);
} else {
result.add(descending ? IndexIterationPointer.ALL_DESC : IndexIterationPointer.ALL);
}
}
if (indexFilter instanceof IndexRangeFilter) {
IndexRangeFilter rangeFilter = (IndexRangeFilter) indexFilter;

Comparable<?> from = null;
if (rangeFilter.getFrom() == null && rangeFilter.getTo() == null) {
// IS NOT NULL range
assert !compositeIndex : "IS NOT NULL range should not be generated for composite index";
result.add(descending ? IndexIterationPointer.IS_NOT_NULL_DESC : IndexIterationPointer.IS_NOT_NULL);
return;
}

// Range filter for non-composite index never includes NULLs.
// Composite index should have both ends specified, and they might cover also NULL values for components
// but from/to will never be NULL but CompositeValue.
Comparable<?> from = compositeIndex ? null : NULL;
if (rangeFilter.getFrom() != null) {
Comparable<?> fromValue = rangeFilter.getFrom().getValue(evalContext);
// If the index filter has expression like a > NULL, we need to
Expand All @@ -169,7 +182,7 @@ private static void createFromIndexFilterInt(
to = toValue;
}

if (from != null && to != null) {
if (to != null) {
int cmp = ((Comparable) from).compareTo(to);
if (cmp > 0 || (cmp == 0 && (!rangeFilter.isFromInclusive() || !rangeFilter.isToInclusive()))) {
// Range scan with from > to would produce empty result.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,22 @@ private static IndexComponentFilter convertFromRangeFilters(
boolean fromInclusive = false;
IndexFilterValue to = null;
boolean toInclusive = false;
boolean found = false;
List<RexNode> expressions = new ArrayList<>(2);

for (IndexComponentCandidate candidate : candidates) {
if (!(candidate.getFilter() instanceof IndexRangeFilter)) {
continue;
}

found = true;
IndexRangeFilter candidateFilter = (IndexRangeFilter) candidate.getFilter();

// Use first matching candidate to define range.
// We do not expect many candidates with literal values as they should be simplified by Calcite.
// When there are both literals and dynamic parameters, we choose one of them.
// Maybe we could be preferring literals, but in general it is not possible to know upfront
// which one would be better.
if (from == null && candidateFilter.getFrom() != null) {
from = candidateFilter.getFrom();
fromInclusive = candidateFilter.isFromInclusive();
Expand All @@ -142,9 +149,14 @@ private static IndexComponentFilter convertFromRangeFilters(
toInclusive = candidateFilter.isToInclusive();
expressions.add(candidate.getExpression());
}

if (from == null && to == null && candidateFilter.getFrom() == null && candidateFilter.getTo() == null) {
// IS NOT NULL filter
expressions.add(candidate.getExpression());
}
}

if (from != null || to != null) {
if (found) {
IndexRangeFilter filter = new IndexRangeFilter(from, fromInclusive, to, toInclusive);
return new IndexComponentFilter(filter, expressions, converterType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,13 @@ private static IndexComponentCandidate prepareSingleColumnCandidate(
removeCastIfPossible(((RexCall) exp).getOperands().get(0))
);

case IS_NOT_NULL:
// Handle SELECT * FROM WHERE column IS NOT NULL.
return prepareSingleColumnCandidateIsNotNull(
exp,
removeCastIfPossible(((RexCall) exp).getOperands().get(0))
);

case GREATER_THAN:
case GREATER_THAN_OR_EQUAL:
case LESS_THAN:
Expand Down Expand Up @@ -554,6 +561,35 @@ private static IndexComponentCandidate prepareSingleColumnCandidateIsNull(RexNod
);
}

/**
* Try creating a candidate filter for the "IS NOT NULL" expression.
* <p>
* Returns the filter RANGE(-inf..+inf) with "allowNulls-false".
*
* @param exp original expression, e.g. {col IS NOT NULL}
* @param operand operand, e.g. {col}; CAST must be unwrapped before the method is invoked
* @return candidate or {@code null}
*/
private static IndexComponentCandidate prepareSingleColumnCandidateIsNotNull(RexNode exp, RexNode operand) {
if (operand.getKind() != SqlKind.INPUT_REF) {
// The operand is not a column, e.g. {'literal' IS NOT NULL}, index cannot be used
return null;
}

int columnIndex = ((RexInputRef) operand).getIndex();

QueryDataType type = HazelcastTypeUtils.toHazelcastType(operand.getType());

// Create a range scan for entire range (-inf..+inf), range scan does not include nulls
IndexFilter filter = new IndexRangeFilter(null, false, null, false);

return new IndexComponentCandidate(
exp,
columnIndex,
filter
);
}

/**
* Try creating a candidate filter for comparison operator.
*
Expand Down Expand Up @@ -783,11 +819,6 @@ private static IndexComponentCandidate prepareSingleColumnCandidateOr(

IndexFilter candidateFilter = candidate.getFilter();

if (!(candidateFilter instanceof IndexEqualsFilter || candidateFilter instanceof IndexCompositeFilter)) {
// Support only equality for ORs
return null;
}

// Make sure that all '=' expressions relate to a single column
if (columnIndex == null) {
columnIndex = candidate.getColumnIndex();
Expand All @@ -796,7 +827,7 @@ private static IndexComponentCandidate prepareSingleColumnCandidateOr(
}

// Flatten. E.g. ((a=1 OR a=2) OR a=3) is parsed into IN(1, 2) and OR(3), that is then flatten into IN(1, 2, 3)
if (candidateFilter instanceof IndexEqualsFilter) {
if (candidateFilter instanceof IndexEqualsFilter || candidateFilter instanceof IndexRangeFilter) {
filters.add(candidateFilter);
} else {
filters.addAll(((IndexCompositeFilter) candidateFilter).getFilters());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
import java.util.Objects;

/**
* Filter the is used for range requests. Could have either lower bound, upper bound or both.
* Filter the is used for range requests. Could have either lower bound, upper bound, both
* or none ({@code IS NOT NULL}).
* <p>
* For non-composite index: matches only NOT NULL values. If any of the bounds
* is {@link com.hazelcast.query.impl.AbstractIndex#NULL}, matches nothing.
Expand Down Expand Up @@ -61,7 +62,6 @@ public IndexRangeFilter() {
}

public IndexRangeFilter(IndexFilterValue from, boolean fromInclusive, IndexFilterValue to, boolean toInclusive) {
assert from != null || to != null;
assert from != null || !fromInclusive : "Unspecified from end must not be inclusive";
assert to != null || !toInclusive : "Unspecified to end must not be inclusive";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public void testDisjunctionOverlappingRange() {

// this query might not use index also due to selectivity of predicates
check(query("field1>=? or field1<=?", f1.valueFrom(), f1.valueTo()),
false, //TODO: HZ-3014 c_sorted(),
c_sorted(),
isNotNull()
);
}
Expand Down Expand Up @@ -223,8 +223,7 @@ private void checkFirstColumn() {

// WHERE f1 IS NOT NULL
// index with additional condition is not used due to cost estimation, full scan is slightly cheaper
check(query("field1 IS NOT NULL"), false, // TODO: c_sorted() after HZ-3014
false, isNotNull());
check(query("field1 IS NOT NULL"), c_sorted(), false, isNotNull());

// WHERE f1=literal
check(query("field1=" + toLiteral(f1, f1.valueFrom())), c_notHashComposite(), eq(f1.valueFrom()));
Expand Down Expand Up @@ -426,7 +425,7 @@ private void checkFirstColumn() {
// WHERE f1<? OR f1>? (range from -inf..val1 and val2..+inf)
check(
query("field1<? OR field1>?", f1.valueFrom(), f1.valueTo()),
false, //TODO: HZ-3014 c_sorted(),
c_sorted(),
or(lt(f1.valueFrom()), gt(f1.valueTo()))
);

Expand Down Expand Up @@ -511,11 +510,11 @@ private void checkFirstColumn() {
}

private void checkSecondColumn() {
// WHERE f1 IS (NOT) NULL
// WHERE f2 IS (NOT) NULL
check(query("field2 IS NULL"), false, isNull_2());
check(query("field2 IS NOT NULL"), false, isNotNull_2());

// WHERE f1<cmp>?
// WHERE f2<cmp>?
check(query("field2=?", f2.valueFrom()), false, eq_2(f2.valueFrom()));
check(query("field2!=?", f2.valueFrom()), false, neq_2(f2.valueFrom()));
check(query("field2>?", f2.valueFrom()), false, gt_2(f2.valueFrom()));
Expand Down Expand Up @@ -581,6 +580,12 @@ private void checkBothColumns() {
c_sorted() || c_notComposite(),
and(or(eq(f1.valueFrom()), eq(f1.valueTo())), and(gt_2(f2.valueFrom()), lt_2(f2.valueTo())))
);
check(
// both field1 parameters have the same value
query("(field1=? OR field1=?) AND (field2>? AND field2<?)", f1.valueFrom(), f1.valueFrom(), f2.valueFrom(), f2.valueTo()),
c_sorted() || c_notComposite(),
and(or(eq(f1.valueFrom()), eq(f1.valueFrom())), and(gt_2(f2.valueFrom()), lt_2(f2.valueTo())))
);

// RANGE + EQ
check(
Expand All @@ -602,6 +607,37 @@ private void checkBothColumns() {
c_sorted(),
and(and(gt(f1.valueFrom()), lt(f1.valueTo())), and(gt_2(f2.valueFrom()), lt_2(f2.valueTo())))
);

// IS NOT NULL/IS NULL combinations
check(
query("field1 IS NULL AND field2 IS NULL"),
// lookup on all index types is possible
true,
and(isNull(), isNull_2())
);
check(
query("field1 IS NULL AND field2 IS NOT NULL"),
// NOT NULL is range scan so composite hash index cannot be used at all.
// Sorted index can be used at least for prefix (field1),
// condition on field2 should use composite index.
c_notHashComposite(),
and(isNull(), isNotNull_2())
);
check(
query("field1 IS NOT NULL AND field2 IS NULL"),
// basic query does not use sorted index due to poor selectivity of IS NOT NULL
// queries with ORDER BY will use sorted index due to high sort cost.
false,
and(isNotNull(), isNull_2())
);
check(
query("field1 IS NOT NULL AND field2 IS NOT NULL"),
// Note that with composite sorted index field2 will not be used in the lookup range
// because it is not possible to express such scan as bounded number of iteration pointers
// (and also it would not bring much value due to poor selectivity).
false,
and(isNotNull(), isNotNull_2())
);
}

private boolean c_always() {
Expand All @@ -616,6 +652,10 @@ private boolean c_sorted() {
return indexType == IndexType.SORTED;
}

private boolean c_hash() {
return indexType == IndexType.HASH;
}

private boolean c_composite() {
return composite;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public class IndexIterationPointer implements IdentifiedDataSerializable {

public static final IndexIterationPointer ALL = create(null, false, null, false, false, null);
public static final IndexIterationPointer ALL_DESC = ALL.asDescending();
// alternative representation of ALL pointer
// alternative representation of ALL pointer (valid only for non-composite index)
public static final IndexIterationPointer ALL_ALT = create(AbstractIndex.NULL, true, null, false, false, null);
public static final IndexIterationPointer ALL_ALT_DESC = ALL_ALT.asDescending();
public static final IndexIterationPointer IS_NULL = create(AbstractIndex.NULL, true, AbstractIndex.NULL, true, false, null);
Expand Down

0 comments on commit ef2ace9

Please sign in to comment.