Skip to content
Permalink
Browse files
DRILL-8209: Introduce rule for converting join with distinct input to…
… semi-join (#2533)
  • Loading branch information
vvysotskyi committed May 19, 2022
1 parent 0213d8e commit c373476b285a6e7d7cdd0b6bc5d756b02481cb5d
Showing 4 changed files with 98 additions and 2 deletions.
@@ -18,6 +18,7 @@
package org.apache.drill.exec.planner;

import org.apache.drill.exec.planner.logical.ConvertMetadataAggregateToDirectScanRule;
import org.apache.drill.exec.planner.logical.DrillDistinctJoinToSemiJoinRule;
import org.apache.drill.exec.planner.physical.MetadataAggPrule;
import org.apache.drill.exec.planner.physical.MetadataControllerPrule;
import org.apache.drill.exec.planner.physical.MetadataHandlerPrule;
@@ -403,6 +404,8 @@ static RuleSet getDrillBasicRules(OptimizerRulesContext optimizerRulesContext) {
if (optimizerRulesContext.getPlannerSettings().isHashJoinEnabled() &&
optimizerRulesContext.getPlannerSettings().isSemiJoinEnabled()) {
basicRules.add(RuleInstance.SEMI_JOIN_PROJECT_RULE);
basicRules.add(DrillDistinctJoinToSemiJoinRule.INSTANCE);
basicRules.add(RuleInstance.JOIN_TO_SEMI_JOIN_RULE);
}

return RuleSets.ofList(basicRules.build());
@@ -72,6 +72,14 @@ public boolean matches(RelOptRuleCall call) {
}
};

SemiJoinRule JOIN_TO_SEMI_JOIN_RULE = new SemiJoinRule.JoinToSemiJoinRule(Join.class, Aggregate.class,
DrillRelFactories.LOGICAL_BUILDER, "DrillJoinToSemiJoinRule") {
public boolean matches(RelOptRuleCall call) {
Join join = call.rel(0);
return !(join.getCondition().isAlwaysTrue() || join.getCondition().isAlwaysFalse());
}
};

JoinPushExpressionsRule JOIN_PUSH_EXPRESSIONS_RULE =
new JoinPushExpressionsRule(Join.class,
DrillRelFactories.LOGICAL_BUILDER);
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.planner.logical;

import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.runtime.SqlFunctions;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.drill.exec.physical.impl.join.JoinUtils;

/**
* Converts join with distinct right input to semi-join.
*/
public class DrillDistinctJoinToSemiJoinRule extends RelOptRule {
public static final RelOptRule INSTANCE = new DrillDistinctJoinToSemiJoinRule();

public DrillDistinctJoinToSemiJoinRule() {
super(RelOptHelper.any(Project.class, Join.class),
DrillRelFactories.LOGICAL_BUILDER, "DrillDistinctJoinToSemiJoinRule");
}

@Override
public boolean matches(RelOptRuleCall call) {
RelMetadataQuery mq = call.getMetadataQuery();
Project project = call.rel(0);
Join join = call.rel(1);
ImmutableBitSet bits = RelOptUtil.InputFinder.bits(project.getProjects(), null);
ImmutableBitSet rightBits = ImmutableBitSet.range(
join.getLeft().getRowType().getFieldCount(),
join.getRowType().getFieldCount());
JoinInfo joinInfo = join.analyzeCondition();
// can convert to semi-join if all of these are true
// - non-cartesian join
// - projecting only columns from left input
// - join has only equality conditions
// - all columns in condition from the right input are unique
return !JoinUtils.checkCartesianJoin(join)
&& !bits.intersects(rightBits)
&& joinInfo.isEqui()
&& SqlFunctions.isTrue(mq.areColumnsUnique(join.getRight(), joinInfo.rightSet()));
}

@Override
public void onMatch(RelOptRuleCall call) {
Project project = call.rel(0);
Join join = call.rel(1);
RelBuilder relBuilder = call.builder();
RelNode relNode = relBuilder.push(join.getLeft())
.push(join.getRight())
.semiJoin(join.getCondition())
.project(project.getProjects())
.build();
call.transformTo(relNode);
}
}
@@ -24,6 +24,7 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.metadata.RelMdUtil;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.Pair;
@@ -102,9 +103,16 @@ public LogicalOperator implement(DrillImplementor implementor) {
return new LogicalSemiJoin(leftOp, rightOp, conditions, joinType);
}

// This method is the same as in Calcite and is here to ensure SemiJoin's behavior
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
return planner.getCostFactory().makeTinyCost();
return computeLogicalJoinCost(planner, mq);
}

@Override
public double estimateRowCount(RelMetadataQuery mq) {
RexNode semiJoinSelectivity =
RelMdUtil.makeSemiJoinSelectivityRexNode(mq, this);
Double selectivity = mq.getSelectivity(getLeft(), semiJoinSelectivity);
return selectivity * mq.getRowCount(getLeft());
}
}

0 comments on commit c373476

Please sign in to comment.