Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,18 @@ protected RelMdDistinctRowCount() {}
// consideration selectivity of predicates passed in. Also, they
// assume the rows are unique even if the table is not
boolean uniq = RelMdUtil.areColumnsDefinitelyUnique(mq, rel, groupKey);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
if (uniq) {
return NumberUtil.multiply(mq.getRowCount(rel),
mq.getSelectivity(rel, predicate));
return NumberUtil.min(
NumberUtil.multiply(mq.getRowCount(rel),
mq.getSelectivity(rel, predicate)), ndvUpperBound);
}
return null;
return ndvUpperBound;
}

public @Nullable Double getDistinctRowCount(Union rel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
double rowCount = 0.0;
int[] adjustments = new int[rel.getRowType().getFieldCount()];
RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
Expand All @@ -108,26 +111,32 @@ protected RelMdDistinctRowCount() {}
Double partialRowCount =
mq.getDistinctRowCount(input, groupKey, modifiedPred);
if (partialRowCount == null) {
return null;
return ndvUpperBound;
}
rowCount += partialRowCount;
}
return rowCount;
return NumberUtil.min(rowCount, ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(Sort rel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
return mq.getDistinctRowCount(rel.getInput(), groupKey, predicate);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
return NumberUtil.min(
mq.getDistinctRowCount(rel.getInput(), groupKey, predicate), ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(TableModify rel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
return mq.getDistinctRowCount(rel.getInput(), groupKey, predicate);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
return NumberUtil.min(
mq.getDistinctRowCount(rel.getInput(), groupKey, predicate), ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(Exchange rel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
return mq.getDistinctRowCount(rel.getInput(), groupKey, predicate);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
return NumberUtil.min(
mq.getDistinctRowCount(rel.getInput(), groupKey, predicate), ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(Filter rel, RelMetadataQuery mq,
Expand All @@ -146,13 +155,17 @@ protected RelMdDistinctRowCount() {}
predicate,
rel.getCondition());

return mq.getDistinctRowCount(rel.getInput(), groupKey, unionPreds);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, unionPreds);
return NumberUtil.min(
mq.getDistinctRowCount(rel.getInput(), groupKey, unionPreds), ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(Join rel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
return RelMdUtil.getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
groupKey, predicate, false);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
return NumberUtil.min(
RelMdUtil.getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
groupKey, predicate, false), ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(Aggregate rel, RelMetadataQuery mq,
Expand Down Expand Up @@ -181,14 +194,15 @@ protected RelMdDistinctRowCount() {}

Double distinctRowCount =
mq.getDistinctRowCount(rel.getInput(), childKey.build(), childPreds);
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
if (distinctRowCount == null) {
return null;
return ndvUpperBound;
} else if (notPushable.isEmpty()) {
return distinctRowCount;
return NumberUtil.min(distinctRowCount, ndvUpperBound);
} else {
RexNode preds =
RexUtil.composeConjunction(rexBuilder, notPushable, true);
return distinctRowCount * RelMdUtil.guessSelectivity(preds);
return NumberUtil.min(distinctRowCount * RelMdUtil.guessSelectivity(preds), ndvUpperBound);
}
}

Expand All @@ -211,12 +225,14 @@ public Double getDistinctRowCount(Values rel, RelMetadataQuery mq,
set.add(ImmutableList.copyOf(values));
values.clear();
}
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
double upBound = ndvUpperBound == null ? Double.MAX_VALUE : ndvUpperBound;
double nRows = set.size();
if ((predicate == null) || predicate.isAlwaysTrue()) {
return nRows;
return Math.min(nRows, upBound);
} else {
double selectivity = RelMdUtil.guessSelectivity(predicate);
return RelMdUtil.numDistinctVals(nRows, nRows * selectivity);
return Math.min(RelMdUtil.numDistinctVals(nRows, nRows * selectivity), upBound);
}
}

Expand Down Expand Up @@ -256,8 +272,9 @@ public Double getDistinctRowCount(Values rel, RelMetadataQuery mq,
mq.getDistinctRowCount(rel.getInput(), baseCols.build(),
modifiedPred);

Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
if (distinctRowCount == null) {
return null;
return ndvUpperBound;
} else if (!notPushable.isEmpty()) {
RexNode preds =
RexUtil.composeConjunction(rexBuilder, notPushable, true);
Expand All @@ -267,30 +284,34 @@ public Double getDistinctRowCount(Values rel, RelMetadataQuery mq,
// No further computation required if the projection expressions
// are all column references
if (projCols.cardinality() == 0) {
return distinctRowCount;
return NumberUtil.min(distinctRowCount, ndvUpperBound);
}

// multiply by the cardinality of the non-child projection expressions
for (int bit : projCols.build()) {
Double subRowCount =
RelMdUtil.cardOfProjExpr(mq, rel, projExprs.get(bit));
if (subRowCount == null) {
return null;
return ndvUpperBound;
}
distinctRowCount *= subRowCount;
}

return RelMdUtil.numDistinctVals(distinctRowCount, mq.getRowCount(rel));
return NumberUtil.min(
RelMdUtil.numDistinctVals(distinctRowCount, mq.getRowCount(rel)), ndvUpperBound);
}

public @Nullable Double getDistinctRowCount(RelSubset rel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
Double ndvUpperBound = RexUtil.estimateColumnsNdv(groupKey, predicate);
final RelNode best = rel.getBest();
if (best != null) {
return mq.getDistinctRowCount(best, groupKey, predicate);
return NumberUtil.min(ndvUpperBound,
mq.getDistinctRowCount(best, groupKey, predicate));
}
if (!Bug.CALCITE_1048_FIXED) {
return getDistinctRowCount((RelNode) rel, mq, groupKey, predicate);
return NumberUtil.min(
getDistinctRowCount((RelNode) rel, mq, groupKey, predicate), ndvUpperBound);
}
Double d = null;
for (RelNode r2 : rel.getRels()) {
Expand All @@ -302,6 +323,6 @@ public Double getDistinctRowCount(Values rel, RelMetadataQuery mq,
// in this set.
}
}
return d;
return NumberUtil.min(d, ndvUpperBound);
}
}
94 changes: 94 additions & 0 deletions core/src/main/java/org/apache/calcite/rex/RexUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.calcite.util.ControlFlowException;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Litmus;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.RangeSets;
Expand Down Expand Up @@ -2620,6 +2621,99 @@ public static List<String> strings(List<RexNode> list) {
return Util.transform(list, Object::toString);
}

/** Estimate the number of distinct values for the specified columns (if possible),
* given the condition. */
public static @Nullable Double estimateColumnsNdv(
ImmutableBitSet columns, @Nullable RexNode condition) {
if (condition == null) {
return null;
}
if (condition.isAlwaysFalse()) {
return 0.0;
}
double ndv = 1.0;
List<RexNode> conditions = RelOptUtil.conjunctions(condition);
for (int col : columns) {
Double colNdv = estimateColumnNdv(col, conditions);
if (colNdv == null) {
// if one column's ndv cannot be estimated, we cannot
// estimate the ndv for the column set.
return null;
}
// ndv's should be multiplied.
// for example, NDV(x) <= a, and NDV(y) <= b;
// then we have NDV(x, y) <= a * b;
ndv *= colNdv;
}
return ndv;
}

/** Estimate the number of distinct values for a single column (if possible),
* given the condition. */
private static @Nullable Double estimateColumnNdv(int colIdx, List<RexNode> conditions) {
Double ndv = null;
for (RexNode condition : conditions) {
Double singleNdv = estimateColumnNdvSingleCondition(colIdx, condition);
if (singleNdv != null) {
// if there are multiple ndv estimations, we select the minimum one.
// for example, if we have two conditions
// 1) x in (a, b)
// 2) x in (a, b, c)
// the first estimation gives NDV(x) = 2, while the second gives NDV(x) = 3
// the final ndv estimation should be NDV(x) = min(2, 3) = 2.
ndv = ndv == null ? singleNdv : Math.min(ndv, singleNdv);
}
}
return ndv;
}

/** Estimate the ndv for a single column, given a single condition. */
private static @Nullable Double estimateColumnNdvSingleCondition(int colIdx, RexNode condition) {
if (condition.getKind() == SqlKind.IS_NULL) {
assert condition instanceof RexCall;
RexNode op = ((RexCall) condition).getOperands().get(0);
if (op instanceof RexInputRef && ((RexInputRef) op).getIndex() == colIdx) {
// Given condition x is null,
// NDV(x) = 1 if x is nullable, or 0 otherwise.
return op.getType().isNullable() ? 1.0 : 0.0;
}
}
if (condition instanceof RexCall
&& ((RexCall) condition).getOperands().size() == 2) {
// process x = a, or Search(x, Sarg).

List<RexNode> operands = ((RexCall) condition).getOperands();
if (operands.get(0).getKind() != SqlKind.LITERAL
&& operands.get(1).getKind() != SqlKind.LITERAL) {
// one of the operands must be a literal, otherwise we cannot estimate
return null;
}
RexNode literalOp = operands.get(0).getKind() == SqlKind.LITERAL
? operands.get(0) : operands.get(1);
RexNode otherOp = operands.get(0).getKind() == SqlKind.LITERAL
? operands.get(1) : operands.get(0);

if (otherOp instanceof RexInputRef && ((RexInputRef) otherOp).getIndex() == colIdx) {
switch (condition.getKind()) {
case EQUALS:
// expression of the form: x = a
return 1.0;
case SEARCH:
// expression of the form: Search(x, Sarg)
Comparable value = ((RexLiteral) literalOp).getValue();
if (value instanceof Sarg) {
Sarg sarg = (Sarg) value;
return sarg.numDistinctVals(literalOp.getType());
}
break;
default:
return null;
}
}
}
return null;
}

/** Helps {@link org.apache.calcite.rex.RexUtil#toDnf}. */
private static class DnfHelper {
final RexBuilder rexBuilder;
Expand Down
44 changes: 44 additions & 0 deletions core/src/main/java/org/apache/calcite/util/RangeSets.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@
*/
package org.apache.calcite.util;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.type.SqlTypeName;

import com.google.common.collect.BoundType;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeSet;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.Iterator;
import java.util.Set;
import java.util.function.BiConsumer;
Expand Down Expand Up @@ -105,6 +110,45 @@ public static <C extends Comparable<C>> int compare(Range<C> r0,
return 0;
}

/**
* Estimates the number of distinct values for a range (if possible).
* A null is returned if the number of distinct values is infinity or unknown.
*/
public static <C extends Comparable<C>> @Nullable Double numDistinctVals(
Range<C> range, RelDataType type) {
if (RangeSets.isPoint(range)) {
return 1.0;
}
if (!range.hasLowerBound() || !range.hasUpperBound()) {
// infinity range.
return null;
}
C lower = range.lowerEndpoint();
C upper = range.upperEndpoint();

if (lower instanceof Number && upper instanceof Number) {
Number lowerNum = (Number) lower;
Number upperNum = (Number) upper;

boolean discreteType = type.getSqlTypeName() == SqlTypeName.BOOLEAN
|| type.getSqlTypeName() == SqlTypeName.TINYINT
|| type.getSqlTypeName() == SqlTypeName.SMALLINT
|| type.getSqlTypeName() == SqlTypeName.INTEGER
|| type.getSqlTypeName() == SqlTypeName.BIGINT;
if (discreteType) {
double ndv = upperNum.doubleValue() - lowerNum.doubleValue() + 1.0;
if (range.upperBoundType() == BoundType.OPEN) {
ndv -= 1.0;
}
if (range.lowerBoundType() == BoundType.OPEN) {
ndv -= 1.0;
}
return ndv;
}
}
return null;
}

/** Computes a hash code for a range set.
*
* <p>This method does not compute the same result as
Expand Down
24 changes: 24 additions & 0 deletions core/src/main/java/org/apache/calcite/util/Sarg.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.calcite.util;

import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;

import com.google.common.collect.ImmutableRangeSet;
Expand Down Expand Up @@ -208,4 +209,27 @@ public int complexity() {
public Sarg negate() {
return Sarg.of(!containsNull, rangeSet.complement());
}

/**
* Estimates the number of distinct values for the Sarg (if possible).
* A null is returned if the number of distinct values is infinity or unknown.
*/
public @Nullable Double numDistinctVals(RelDataType type) {
double ndv = 0;

for (Range<C> range : rangeSet.asRanges()) {
Double rangeNdv = RangeSets.numDistinctVals(range, type);
if (rangeNdv == null) {
// if the ndv is infinity or unknown for one range,
// the overall ndv is infinity or unknown.
return null;
}
ndv += rangeNdv;
}
if (containsNull && type.isNullable()) {
// account for null
ndv += 1.0;
}
return ndv;
}
}
Loading