Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.calcite.rex.RexCorrelVariable;
import org.apache.calcite.rex.RexFieldAccess;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexOver;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.BitSets;
Expand All @@ -43,10 +44,10 @@
/**
* Push Project under Correlate to apply on Correlate's left and right child
*/
public class ProjectCorrelateTransposeRule extends RelOptRule {
public class ProjectCorrelateTransposeRule extends RelOptRule {

public static final ProjectCorrelateTransposeRule INSTANCE =
new ProjectCorrelateTransposeRule(expr -> true,
new ProjectCorrelateTransposeRule(expr -> !(expr instanceof RexOver),
RelFactories.LOGICAL_BUILDER);

//~ Instance fields --------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexOver;
import org.apache.calcite.tools.RelBuilderFactory;

import java.util.ArrayList;
Expand All @@ -40,7 +41,7 @@
*/
public class ProjectSetOpTransposeRule extends RelOptRule {
public static final ProjectSetOpTransposeRule INSTANCE =
new ProjectSetOpTransposeRule(expr -> false,
new ProjectSetOpTransposeRule(expr -> !(expr instanceof RexOver),
RelFactories.LOGICAL_BUILDER);

//~ Instance fields --------------------------------------------------------
Expand Down Expand Up @@ -90,23 +91,35 @@ public void onMatch(RelOptRuleCall call) {
List<RelNode> newSetOpInputs = new ArrayList<>();
int[] adjustments = pushProject.getAdjustments();

// push the projects completely below the setop; this
// is different from pushing below a join, where we decompose
// to try to keep expensive expressions above the join,
// because UNION ALL does not have any filtering effect,
// and it is the only operator this rule currently acts on
for (RelNode input : setOp.getInputs()) {
// be lazy: produce two ProjectRels, and let another rule
// merge them (could probably just clone origProj instead?)
Project p = pushProject.createProjectRefsAndExprs(input, true, false);
newSetOpInputs.add(pushProject.createNewProject(p, adjustments));
final RelNode node;
if (RexOver.containsOver(origProj.getProjects(), null)) {
// should not push over past setop but can push its operand down.
for (RelNode input : setOp.getInputs()) {
Project p = pushProject.createProjectRefsAndExprs(input, true, false);
// make sure that it is not a trivial project to avoid infinite loop.
if (p.getRowType().equals(input.getRowType())) {
return;
}
newSetOpInputs.add(p);
}
SetOp newSetOp =
setOp.copy(setOp.getTraitSet(), newSetOpInputs);
node = pushProject.createNewProject(newSetOp, adjustments);
} else {
// push some expressions below the setop; this
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is if..else.. in that I don't want to change the plan of some cases. For instance, current plan of

select ename,avg(empno) from 
(select * from emp as e1 union all select * from emp as e2)

is

LogicalAggregate(group=[{0}], EXPR$1=[AVG($1)])
  LogicalUnion(all=[true])
    LogicalProject(ENAME=[$1], EMPNO=[$0])
      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
    LogicalProject(ENAME=[$1], EMPNO=[$0])
      LogicalTableScan(table=[[CATALOG, SALES, EMP]])

But if without if..else.., the plan becomes

LogicalAggregate(group=[{0}], EXPR$1=[AVG($1)])
  LogicalProject(ENAME=[$1], EMPNO=[$0])
    LogicalUnion(all=[true])
      LogicalProject(EMPNO=[$0], ENAME=[$1])
        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
      LogicalProject(EMPNO=[$0], ENAME=[$1])
        LogicalTableScan(table=[[CATALOG, SALES, EMP]])

which seems redundant.

// is different from pushing below a join, where we decompose
// to try to keep expensive expressions above the join,
// because UNION ALL does not have any filtering effect,
// and it is the only operator this rule currently acts on
setOp.getInputs().forEach(input ->
newSetOpInputs.add(
pushProject.createNewProject(
pushProject.createProjectRefsAndExprs(
input, true, false), adjustments)));
node = setOp.copy(setOp.getTraitSet(), newSetOpInputs);
}

// create a new setop whose children are the ProjectRels created above
SetOp newSetOp =
setOp.copy(setOp.getTraitSet(), newSetOpInputs);

call.transformTo(newSetOp);
call.transformTo(node);
}
}

Expand Down
21 changes: 21 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,16 @@ private void basePushFilterPastAggWithGroupingSets(boolean unchanged)
+ "on e.ename = b.ename and e.deptno = 10");
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-3004">[CALCITE-3004]
* Should not push over past union but its operands can since setop
* will affect row count</a>. */
@Test public void testProjectSetOpTranspose() {
checkPlanning(ProjectSetOpTransposeRule.INSTANCE,
"select job, sum(sal + 100) over (partition by deptno) from\n"
+ "(select * from emp e1 union all select * from emp e2)");
}

@Test public void testProjectCorrelateTransposeDynamic() {
ProjectCorrelateTransposeRule customPCTrans =
new ProjectCorrelateTransposeRule(skipItem, RelFactories.LOGICAL_BUILDER);
Expand Down Expand Up @@ -1337,6 +1347,17 @@ private void basePushFilterPastAggWithGroupingSets(boolean unchanged)
+ "unnest(t1.employees) as t2");
}

/** As {@link #testProjectSetOpTranspose()};
* should not push over past correlate but its operands can since correlate
* will affect row count. */
@Test public void testProjectCorrelateTransposeWithOver() {
checkPlanning(ProjectCorrelateTransposeRule.INSTANCE,
"select sum(t1.deptno + 1) over (partition by t1.name),\n"
+ "count(t2.empno) over ()\n"
+ "from DEPT_NESTED as t1, "
+ "unnest(t1.employees) as t2");
}

/** Tests that the default instance of {@link FilterProjectTransposeRule}
* does not push a Filter that contains a correlating variable.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9430,6 +9430,35 @@ LogicalProject(NAME=[$0], ENAME=[$2])
Uncollect
LogicalProject(EMPLOYEES=[$cor1.EMPLOYEES])
LogicalValues(tuples=[[{ 0 }]])
]]>
</Resource>
</TestCase>
<TestCase name="testProjectSetOpTranspose">
<Resource name="sql">
<![CDATA[select job, sum(sal + 100) over (partition by deptno) from
(select * from emp e1 union all select * from emp e2)
]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(JOB=[$2], EXPR$1=[SUM(+($5, 100)) OVER (PARTITION BY $7 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
LogicalUnion(all=[true])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(JOB=[$0], EXPR$1=[SUM($2) OVER (PARTITION BY $1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
LogicalUnion(all=[true])
LogicalProject(JOB=[$2], DEPTNO=[$7], +=[+($5, 100)])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(JOB=[$2], DEPTNO=[$7], +=[+($5, 100)])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
Expand Down Expand Up @@ -9484,6 +9513,36 @@ LogicalProject(NAME=[$0], ENAME=[$2])
Uncollect
LogicalProject(EMPLOYEES=[$cor1.EMPLOYEES])
LogicalValues(tuples=[[{ 0 }]])
]]>
</Resource>
</TestCase>
<TestCase name="testProjectCorrelateTransposeWithOver">
<Resource name="sql">
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. Somehow there's a duplicate key in RelOptRulesTest.xml. Maybe it's a merge error, I don't know. Can someone add code to DiffRepository to make it fail when reading a .xml file if there are duplicate keys.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@julianhyde I would like to do it. : )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened an issue(CALCITE-3009) for it.

<![CDATA[select sum(t1.deptno + 1) over (partition by t1.name),
count(t2.empno) over ()
from DEPT_NESTED as t1, unnest(t1.employees) as t2
]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EXPR$0=[SUM(+($0, 1)) OVER (PARTITION BY $1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)], EXPR$1=[COUNT($4) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{3}])
LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
Uncollect
LogicalProject(EMPLOYEES=[$cor0.EMPLOYEES])
LogicalValues(tuples=[[{ 0 }]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(EXPR$0=[SUM($2) OVER (PARTITION BY $0 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)], EXPR$1=[COUNT($3) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
LogicalCorrelate(correlation=[$cor1], joinType=[inner], requiredColumns=[{1}])
LogicalProject(NAME=[$1], EMPLOYEES=[$3], +=[+($0, 1)])
LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
LogicalProject(EMPNO=[$0])
Uncollect
LogicalProject(EMPLOYEES=[$cor1.EMPLOYEES])
LogicalValues(tuples=[[{ 0 }]])
]]>
</Resource>
</TestCase>
Expand Down