Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.logical.LogicalAggregate;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.LogicVisitor;
import org.apache.calcite.rex.RexCorrelVariable;
Expand All @@ -39,6 +40,7 @@
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlCountAggFunction;
import org.apache.calcite.sql.fun.SqlQuantifyOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql2rel.RelDecorrelator;
Expand Down Expand Up @@ -309,8 +311,22 @@ private RexNode rewriteExists(RexSubQuery e, Set<CorrelationId> variablesSet,
}

builder.as("dt");
boolean generateNullsOnRight = true;
if (e.rel instanceof LogicalAggregate) {
// SELECT f0, count(*) FROM t GROUP BY () will always return 1 row.
// That means, the RHS never generates null.
final LogicalAggregate aggregate = (LogicalAggregate) e.rel;
if (aggregate.getGroupSet().isEmpty()
&& aggregate.getAggCallList().stream()
.anyMatch(c -> c.getAggregation() instanceof SqlCountAggFunction)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't need to be count, any agg function with empty groupset will always generate 1 row.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but only count agg doesn't output nulls.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should be able to trust the aggregate function to derive its own nullability.

For example, SUM knows that even when applied to a not-null column x, if the groupSet is empty it may return null. COUNT knows that it never returns null.

So maybe just look at the rowType of Aggregate, and look at whether the columns are NOT NULL.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, Julian, let me have a try :)

generateNullsOnRight = false;
}
}

builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
builder.join(
generateNullsOnRight ? JoinRelType.LEFT : JoinRelType.INNER,
builder.literal(true),
variablesSet);

return builder.isNotNull(Util.last(builder.fields()));
}
Expand Down
19 changes: 14 additions & 5 deletions core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rel.logical.LogicalAggregate;
import org.apache.calcite.rel.logical.LogicalCorrelate;
import org.apache.calcite.rel.logical.LogicalFilter;
import org.apache.calcite.rel.logical.LogicalJoin;
import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rel.logical.LogicalSnapshot;
import org.apache.calcite.rel.logical.LogicalTableFunctionScan;
import org.apache.calcite.rel.metadata.RelMdUtil;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.rules.FilterCorrelateRule;
Expand Down Expand Up @@ -432,10 +432,6 @@ public Frame decorrelateRel(Sort rel) {
return register(rel, newSort, frame.oldToNewOutputs, frame.corDefOutputs);
}

public Frame decorrelateRel(Values rel) {
// There are no inputs, so rel does not need to be changed.
return null;
}

public Frame decorrelateRel(LogicalAggregate rel) {
return decorrelateRel((Aggregate) rel);
Expand Down Expand Up @@ -1017,6 +1013,13 @@ public Frame decorrelateRel(LogicalSnapshot rel) {
return decorrelateRel((RelNode) rel);
}

public Frame decorrelateRel(LogicalTableFunctionScan rel) {
if (RexUtil.containsCorrelation(rel.getCall())) {
return null;
}
return decorrelateRel((RelNode) rel);
}

public Frame decorrelateRel(LogicalFilter rel) {
return decorrelateRel((Filter) rel);
}
Expand Down Expand Up @@ -1385,6 +1388,12 @@ private RelNode aggregateCorrelatorOutput(
pair.left,
projectPulledAboveLeftCorrelator,
isCount);
// Fix the nullability.
if (projectPulledAboveLeftCorrelator) {
newProjExpr = relBuilder.getRexBuilder().makeAbstractCast(
relBuilder.getTypeFactory().createTypeWithNullability(newProjExpr.getType(), true),
newProjExpr);
}
newProjects.add(Pair.of(newProjExpr, pair.right));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3369,10 +3369,15 @@ private void checkLiteral2(String expression, String expected) {
+ " lateral (select d.\"department_id\" + 1 as d_plusOne"
+ " from (values(true)))";

final String expected = "SELECT \"$cor0\".\"department_id\", \"$cor0\".\"D_PLUSONE\"\n"
+ "FROM \"foodmart\".\"department\" AS \"$cor0\",\n"
+ "LATERAL (SELECT \"$cor0\".\"department_id\" + 1 AS \"D_PLUSONE\"\n"
+ "FROM (VALUES (TRUE)) AS \"t\" (\"EXPR$0\")) AS \"t0\"";
final String expected = "SELECT \"department\".\"department_id\", \"t2\".\"D_PLUSONE\"\n"
+ "FROM \"foodmart\".\"department\"\n"
+ "INNER JOIN (SELECT \"t1\".\"department_id\" + 1 AS \"D_PLUSONE\","
+ " \"t1\".\"department_id\"\n"
+ "FROM (VALUES (TRUE)) AS \"t\" (\"EXPR$0\"),\n"
+ "(SELECT \"department_id\"\n"
+ "FROM \"foodmart\".\"department\"\n"
+ "GROUP BY \"department_id\") AS \"t1\") AS \"t2\" "
+ "ON \"department\".\"department_id\" = \"t2\".\"department_id\"";
sql(sql).ok(expected);
}

Expand Down
24 changes: 24 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5618,6 +5618,30 @@ private Sql checkSubQuery(String sql) {
checkSubQuery(sql).withLateDecorrelation(true).check();
}

@Test public void testDecorrelateExists2() throws Exception {
final String sql = "select \n"
+ " exists (select count(t2.id) \n"
+ " from (values(1), (2)) t2(id) where t2.id = t1.id)\n"
+ " from (values(3), (4)) t1(id)";
checkSubQuery(sql).withLateDecorrelation(true).check();
}

@Test public void testDecorrelateExists3() throws Exception {
final String sql = "select \n"
+ " exists (select min(t2.id) \n"
+ " from (values(1), (2)) t2(id) where t2.id = t1.id)\n"
+ " from (values(3), (4)) t1(id)";
checkSubQuery(sql).withLateDecorrelation(true).check();
}

@Test public void testDecorrelateScalarSubQuery() throws Exception {
final String sql = "select \n"
+ " (select count(t2.id) \n"
+ " from (values(1), (2)) t2(id) where t2.id = t1.id)\n"
+ " from (values(1), (2)) t1(id)";
checkSubQuery(sql).withLateDecorrelation(true).check();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-1511">[CALCITE-1511]
* AssertionError while decorrelating query with two EXISTS
Expand Down
117 changes: 117 additions & 0 deletions core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,123 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
LogicalProject(i=[true])
LogicalFilter(condition=[=($cor0.DEPTNO, $7)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testDecorrelateExists2">
<Resource name="sql">
<![CDATA[select
exists (select count(t2.id)
from (values(1), (2)) t2(id) where t2.id = t1.id)
from (values(3), (4)) t1(id)]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EXPR$0=[EXISTS({
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalFilter(condition=[=($0, $cor0.ID)])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
})])
LogicalValues(tuples=[[{ 3 }, { 4 }]])
]]>
</Resource>
<Resource name="planMid">
<![CDATA[
LogicalProject(EXPR$0=[true])
LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])
LogicalValues(tuples=[[{ 3 }, { 4 }]])
LogicalProject(i=[true])
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalFilter(condition=[=($0, $cor0.ID)])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(EXPR$0=[true])
LogicalJoin(condition=[=($0, $2)], joinType=[inner])
LogicalValues(tuples=[[{ 3 }, { 4 }]])
LogicalProject(i=[true], ID=[$0])
LogicalAggregate(group=[{0}], EXPR$0=[COUNT()])
LogicalProject(ID=[$0])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
</TestCase>
<TestCase name="testDecorrelateExists3">
<Resource name="sql">
<![CDATA[select
exists (select min(t2.id)
from (values(1), (2)) t2(id) where t2.id = t1.id)
from (values(3), (4)) t1(id)]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EXPR$0=[EXISTS({
LogicalAggregate(group=[{}], EXPR$0=[MIN($0)])
LogicalFilter(condition=[=($0, $cor0.ID)])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
})])
LogicalValues(tuples=[[{ 3 }, { 4 }]])
]]>
</Resource>
<Resource name="planMid">
<![CDATA[
LogicalProject(EXPR$0=[IS NOT NULL($1)])
LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
LogicalValues(tuples=[[{ 3 }, { 4 }]])
LogicalProject(i=[true])
LogicalAggregate(group=[{}], EXPR$0=[MIN($0)])
LogicalFilter(condition=[=($0, $cor0.ID)])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(EXPR$0=[true])
LogicalJoin(condition=[=($0, $1)], joinType=[left])
LogicalValues(tuples=[[{ 3 }, { 4 }]])
LogicalAggregate(group=[{0}], EXPR$0=[MIN($0)])
LogicalProject(ID=[$0])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
</TestCase>
<TestCase name="testDecorrelateScalarSubQuery">
<Resource name="sql">
<![CDATA[select
(select count(t2.id)
from (values(1), (2)) t2(id) where t2.id = t1.id)
from (values(1), (2)) t1(id)]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EXPR$0=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalFilter(condition=[=($0, $cor0.ID)])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
})])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
<Resource name="planMid">
<![CDATA[
LogicalProject(EXPR$0=[$1])
LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalFilter(condition=[=($0, $cor0.ID)])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(EXPR$0=[CASE(IS NULL($2), 0:BIGINT, $2)])
LogicalJoin(condition=[=($0, $1)], joinType=[left])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
LogicalAggregate(group=[{0}], EXPR$0=[COUNT()])
LogicalProject(ID=[$0])
LogicalValues(tuples=[[{ 1 }, { 2 }]])
]]>
</Resource>
</TestCase>
Expand Down
Loading