Skip to content

Commit

Permalink
[CALCITE-1984] Incorrect rewriting with materialized views using DIST…
Browse files Browse the repository at this point in the history
…INCT in aggregate functions
  • Loading branch information
jcamachor committed Nov 9, 2017
1 parent 77a3549 commit a147d1a
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1053,9 +1053,17 @@ protected MaterializedViewAggregateRule(RelOptRuleOperand operand,
rexBuilder.makeInputRef(relBuilder.peek(),
aggregate.getGroupCount() + i)));
}
RelNode prevNode = relBuilder.peek();
RelNode result = relBuilder
.aggregate(relBuilder.groupKey(groupSet, null), aggregateCalls)
.build();
if (prevNode == result && groupSet.size() != result.getRowType().getFieldCount()) {
// Aggregate was not inserted but we need to prune columns
result = relBuilder
.push(result)
.project(relBuilder.fields(groupSet.asList()))
.build();
}
if (topProject != null) {
// Top project
return topProject.copy(topProject.getTraitSet(), ImmutableList.of(result));
Expand Down Expand Up @@ -1271,10 +1279,18 @@ protected MaterializedViewAggregateRule(RelOptRuleOperand operand,
rewritingMapping.set(targetIdx, sourceIdx);
}
}
RelNode prevNode = result;
result = relBuilder
.push(result)
.aggregate(relBuilder.groupKey(groupSet, null), aggregateCalls)
.build();
if (prevNode == result && groupSet.size() != result.getRowType().getFieldCount()) {
// Aggregate was not inserted but we need to prune columns
result = relBuilder
.push(result)
.project(relBuilder.fields(groupSet.asList()))
.build();
}
// We introduce a project on top, as group by columns order is lost
List<RexNode> projects = new ArrayList<>();
Mapping inverseMapping = rewritingMapping.inverse();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1582,6 +1582,19 @@ private void checkSatisfiable(RexNode e, String s) {
HR_FKUK_MODEL);
}

@Test public void testJoinAggregateMaterializationAggregateFuncs13() {
checkNoMaterialize(
"select \"dependents\".\"empid\", \"emps\".\"deptno\", count(distinct \"salary\") as s\n"
+ "from \"emps\"\n"
+ "join \"dependents\" on (\"emps\".\"empid\" = \"dependents\".\"empid\")\n"
+ "group by \"dependents\".\"empid\", \"emps\".\"deptno\"",
"select \"emps\".\"deptno\", count(\"salary\") as s\n"
+ "from \"emps\"\n"
+ "join \"dependents\" on (\"emps\".\"empid\" = \"dependents\".\"empid\")\n"
+ "group by \"dependents\".\"empid\", \"emps\".\"deptno\"",
HR_FKUK_MODEL);
}

@Test public void testJoinMaterialization4() {
checkMaterialize(
"select \"empid\" \"deptno\" from \"emps\"\n"
Expand Down Expand Up @@ -1995,6 +2008,80 @@ public List<Object> apply(JsonBuilder builder) {
}
}

@Test public void testAggregateMaterializationOnCountDistinctQuery1() {
// The column empid is already unique, thus DISTINCT is not
// in the COUNT of the resulting rewriting
checkMaterialize(
"select \"deptno\", \"empid\", \"salary\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"empid\", \"salary\"",
"select \"deptno\", count(distinct \"empid\") as c from (\n"
+ "select \"deptno\", \"empid\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"empid\")\n"
+ "group by \"deptno\"",
HR_FKUK_MODEL,
CalciteAssert.checkResultContains(
"EnumerableAggregate(group=[{0}], C=[COUNT($1)])\n"
+ " EnumerableTableScan(table=[[hr, m0]]"));
}

@Test public void testAggregateMaterializationOnCountDistinctQuery2() {
// The column empid is already unique, thus DISTINCT is not
// in the COUNT of the resulting rewriting
checkMaterialize(
"select \"deptno\", \"salary\", \"empid\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"salary\", \"empid\"",
"select \"deptno\", count(distinct \"empid\") as c from (\n"
+ "select \"deptno\", \"empid\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"empid\")\n"
+ "group by \"deptno\"",
HR_FKUK_MODEL,
CalciteAssert.checkResultContains(
"EnumerableAggregate(group=[{0}], C=[COUNT($2)])\n"
+ " EnumerableTableScan(table=[[hr, m0]]"));
}

@Test public void testAggregateMaterializationOnCountDistinctQuery3() {
// The column salary is not unique, thus we end up with
// a different rewriting
checkMaterialize(
"select \"deptno\", \"empid\", \"salary\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"empid\", \"salary\"",
"select \"deptno\", count(distinct \"salary\") from (\n"
+ "select \"deptno\", \"salary\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"salary\")\n"
+ "group by \"deptno\"",
HR_FKUK_MODEL,
CalciteAssert.checkResultContains(
"EnumerableAggregate(group=[{0}], EXPR$1=[COUNT($1)])\n"
+ " EnumerableAggregate(group=[{0, 2}])\n"
+ " EnumerableTableScan(table=[[hr, m0]]"));
}

@Test public void testAggregateMaterializationOnCountDistinctQuery4() {
// Although there is no DISTINCT in the COUNT, this is
// equivalent to previous query
checkMaterialize(
"select \"deptno\", \"salary\", \"empid\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"salary\", \"empid\"",
"select \"deptno\", count(\"salary\") from (\n"
+ "select \"deptno\", \"salary\"\n"
+ "from \"emps\"\n"
+ "group by \"deptno\", \"salary\")\n"
+ "group by \"deptno\"",
HR_FKUK_MODEL,
CalciteAssert.checkResultContains(
"EnumerableAggregate(group=[{0}], EXPR$1=[COUNT()])\n"
+ " EnumerableAggregate(group=[{0, 1}])\n"
+ " EnumerableTableScan(table=[[hr, m0]]"));
}

@Test public void testMaterializationSubstitution() {
String q = "select *\n"
+ "from (select * from \"emps\" where \"empid\" < 300)\n"
Expand Down Expand Up @@ -2127,7 +2214,7 @@ public static class HrFKUKSchema {
new Employee(100, 10, "Bill", 10000, 1000),
new Employee(200, 20, "Eric", 8000, 500),
new Employee(150, 10, "Sebastian", 7000, null),
new Employee(110, 10, "Theodore", 11500, 250),
new Employee(110, 10, "Theodore", 10000, 250),
};
public final Department[] depts = {
new Department(10, "Sales", Arrays.asList(emps[0], emps[2], emps[3]),
Expand Down

0 comments on commit a147d1a

Please sign in to comment.