Skip to content

Commit

Permalink
DRILL-7227: Fix predicate check in DrillRelOptUtil.analyzeSimpleEquiJoin
Browse files Browse the repository at this point in the history
closes #1775
  • Loading branch information
Gautam Parai authored and gparai committed May 9, 2019
1 parent e5e9b35 commit b774eec
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 23 deletions.
Expand Up @@ -678,10 +678,16 @@ public Void visitCall(RexCall call) {
super.visitCall(call);
} else {
if (call.getKind() == SqlKind.EQUALS) {
int leftFieldCount = join.getLeft().getRowType().getFieldCount();
int rightFieldCount = join.getRight().getRowType().getFieldCount();
RexNode leftComparand = call.operands.get(0);
RexNode rightComparand = call.operands.get(1);
// If a join condition predicate has something more complicated than a RexInputRef
// we bail out!
if (!(leftComparand instanceof RexInputRef && rightComparand instanceof RexInputRef)) {
joinConditions.clear();
throw new Util.FoundOne(call);
}
int leftFieldCount = join.getLeft().getRowType().getFieldCount();
int rightFieldCount = join.getRight().getRowType().getFieldCount();
RexInputRef leftFieldAccess = (RexInputRef) leftComparand;
RexInputRef rightFieldAccess = (RexInputRef) rightComparand;
if (leftFieldAccess.getIndex() >= leftFieldCount + rightFieldCount ||
Expand Down
Expand Up @@ -55,8 +55,12 @@
import org.apache.drill.metastore.ColumnStatistics;
import org.apache.drill.metastore.ColumnStatisticsKind;
import org.apache.drill.metastore.TableMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
private static final Logger logger = LoggerFactory.getLogger(DrillRelMdDistinctRowCount.class);

private static final DrillRelMdDistinctRowCount INSTANCE =
new DrillRelMdDistinctRowCount();

Expand Down Expand Up @@ -142,10 +146,7 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
if (groupKey.length() == 0) {
return selectivity * rowCount;
}
/* If predicate is present, determine its selectivity to estimate filtered rows. Thereafter,
* compute the number of distinct rows
*/
selectivity = mq.getSelectivity(scan, predicate);

TableMetadata tableMetadata;
try {
tableMetadata = table.getGroupScan().getTableMetadata();
Expand All @@ -154,38 +155,43 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
return scan.estimateRowCount(mq) * 0.1;
}

double s = 1.0;
boolean allCols = true;
double estRowCnt = 1.0;
String colName = "";
boolean allColsHaveNDV = true;
for (int i = 0; i < groupKey.length(); i++) {
final String colName = type.getFieldNames().get(i);
// Skip NDV, if not available
colName = type.getFieldNames().get(i);
if (!groupKey.get(i)) {
allCols = false;
break;
continue;
}
ColumnStatistics columnStatistics = tableMetadata != null ?
tableMetadata.getColumnStatistics(SchemaPath.getSimplePath(colName)) : null;
Double ndv = columnStatistics != null ? (Double) columnStatistics.getStatistic(ColumnStatisticsKind.NDV) : null;
// Skip NDV, if not available
if (ndv == null) {
continue;
allColsHaveNDV = false;
break;
}
s *= ndv;
estRowCnt *= ndv;
selectivity = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
/* If predicate is on group-by column, scale down the NDV by selectivity. Consider the query
* select a, b from t where a = 10 group by a, b. Here, NDV(a) will be scaled down by SEL(a)
* whereas NDV(b) will not.
*/
if (selectivity > 0) {
s *= selectivity;
estRowCnt *= selectivity;
}
}
s = Math.min(s, rowCount);
if (!allCols) {
estRowCnt = Math.min(estRowCnt, rowCount);
if (!allColsHaveNDV) {
if (logger.isDebugEnabled()) {
logger.debug(String.format("NDV not available for %s(%s). Using default rowcount for group-by %s",
(tableMetadata != null ? tableMetadata.getTableName() : ""), colName, groupKey.toString()));
}
// Could not get any NDV estimate from stats - probably stats not present for GBY cols. So Guess!
return scan.estimateRowCount(mq) * 0.1;
} else {
/* rowCount maybe less than NDV(different source), sanity check OR NDV not used at all */
return s;
return estRowCnt;
}
}

Expand Down Expand Up @@ -239,18 +245,28 @@ private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadata
if (groupKey.get(idx)) {
// GBY key is present in some filter - now try options A) and B) as described above
double ndvSGby = Double.MAX_VALUE;
Double ndv;
boolean presentInFilter = false;
ImmutableBitSet sGby = getSingleGbyKey(groupKey, idx);
if (sGby != null) {
// If we see any NULL ndv i.e. cant process ..we bail out!
for (ImmutableBitSet jFilter : joinFiltersSet) {
if (jFilter.contains(sGby)) {
presentInFilter = true;
// Found join condition containing this GBY key. Pick min NDV across all columns in this join
for (int fidx : jFilter) {
if (fidx < left.getRowType().getFieldCount()) {
ndvSGby = Math.min(ndvSGby, mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred));
ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = Math.min(ndvSGby, ndv);
} else {
ndvSGby = Math.min(ndvSGby, mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx-left.getRowType().getFieldCount()), rightPred));
ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx-left.getRowType().getFieldCount()), rightPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = Math.min(ndvSGby, ndv);
}
}
break;
Expand All @@ -260,9 +276,17 @@ private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadata
if (!presentInFilter) {
for (int sidx : sGby) {
if (sidx < left.getRowType().getFieldCount()) {
ndvSGby = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = ndv;
} else {
ndvSGby = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx-left.getRowType().getFieldCount()), rightPred);
ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx-left.getRowType().getFieldCount()), rightPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = ndv;
}
}
}
Expand Down
Expand Up @@ -290,7 +290,7 @@ public void testUseStatistics() throws Exception {
query = " select emp.employee_id from dfs.tmp.employeeUseStat emp join dfs.tmp.departmentUseStat dept"
+ " on emp.department_id = dept.department_id "
+ " group by emp.employee_id";
String[] expectedPlan8 = {"HashAgg\\(group=\\[\\{0\\}\\]\\).*rowcount = 115.49475630811243,.*",
String[] expectedPlan8 = {"HashAgg\\(group=\\[\\{0\\}\\]\\).*rowcount = 730.0992454469841,.*",
"HashJoin\\(condition.*\\).*rowcount = 1155.0,.*",
"Scan.*columns=\\[`department_id`, `employee_id`\\].*rowcount = 1155.0.*",
"Scan.*columns=\\[`department_id`\\].*rowcount = 12.0.*"};
Expand Down

0 comments on commit b774eec

Please sign in to comment.