Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](nereids)support uncorrelated subquery in join condition #26672

Merged
merged 2 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.BinaryOperator;
import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
import org.apache.doris.nereids.trees.expressions.Exists;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.InSubquery;
Expand All @@ -45,6 +44,7 @@
import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalSort;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
Expand Down Expand Up @@ -180,16 +180,23 @@ public List<Rule> buildRules() {
Collectors.toList()));
List<Expression> subqueryConjuncts = joinConjuncts.get(true);
if (subqueryConjuncts == null || subqueryConjuncts.stream()
.anyMatch(expr -> !isValidSubqueryConjunct(expr, join.left()))) {
.anyMatch(expr -> !isValidSubqueryConjunct(expr))) {
return join;
}

List<RelatedInfo> relatedInfoList = collectRelatedInfo(
subqueryConjuncts, join.left(), join.right());
if (relatedInfoList.stream().anyMatch(info -> info == RelatedInfo.UnSupported)) {
return join;
}

ImmutableList<Set<SubqueryExpr>> subqueryExprsList = subqueryConjuncts.stream()
.<Set<SubqueryExpr>>map(e -> e.collect(SubqueryExpr.class::isInstance))
.collect(ImmutableList.toImmutableList());
ImmutableList.Builder<Expression> newConjuncts = new ImmutableList.Builder<>();
LogicalPlan applyPlan = null;
LogicalPlan applyPlan;
LogicalPlan leftChildPlan = (LogicalPlan) join.left();
LogicalPlan rightChildPlan = (LogicalPlan) join.right();

// Subquery traversal with the conjunct of and as the granularity.
for (int i = 0; i < subqueryExprsList.size(); ++i) {
Expand All @@ -207,44 +214,97 @@ public List<Rule> buildRules() {

applyPlan = subqueryToApply(
subqueryExprs.stream().collect(ImmutableList.toImmutableList()),
leftChildPlan, context.getSubqueryToMarkJoinSlot(),
relatedInfoList.get(i) == RelatedInfo.RelatedToLeft ? leftChildPlan : rightChildPlan,
context.getSubqueryToMarkJoinSlot(),
ctx.cascadesContext, Optional.of(conjunct), false);
leftChildPlan = applyPlan;
if (relatedInfoList.get(i) == RelatedInfo.RelatedToLeft) {
leftChildPlan = applyPlan;
} else {
rightChildPlan = applyPlan;
}
newConjuncts.add(conjunct);
}
List<Expression> simpleConjuncts = joinConjuncts.get(false);
if (simpleConjuncts != null) {
newConjuncts.addAll(simpleConjuncts);
}
Plan newJoin = join.withConjunctsChildren(join.getHashJoinConjuncts(),
newConjuncts.build(), applyPlan, join.right());
newConjuncts.build(), leftChildPlan, rightChildPlan);
return newJoin;
}))
);
}

private static boolean isValidSubqueryConjunct(Expression expression, Plan leftChild) {
// the subquery must be uncorrelated subquery or only correlated to the left child
// currently only support the following 4 simple scenarios
// 1. col ComparisonPredicate subquery
// 2. col in (subquery)
// 3. exists (subquery)
// 4. col1 ComparisonPredicate subquery or xxx (no more subquery)
List<Slot> slots = leftChild.getOutput();
if (expression instanceof ComparisonPredicate && expression.child(1) instanceof ScalarSubquery) {
ScalarSubquery subquery = (ScalarSubquery) expression.child(1);
return slots.containsAll(subquery.getCorrelateSlots());
} else if (expression instanceof InSubquery) {
return slots.containsAll(((InSubquery) expression).getCorrelateSlots());
} else if (expression instanceof Exists) {
return slots.containsAll(((Exists) expression).getCorrelateSlots());
} else {
private static boolean isValidSubqueryConjunct(Expression expression) {
// only support 1 subquery expr in the expression
// don't support expression like subquery1 or subquery2
return expression.collectToList(SubqueryExpr.class::isInstance).size() == 1;
}

private enum RelatedInfo {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RelatedInfo 这个名字看不出是什么意思。correlatedType 是不是合适一些?

// both subquery and its output don't related to any child. like (select sum(t.a) from t) > 1
Unrelated,
// either subquery or its output only related to left child. like bellow:
// tableLeft.a in (select t.a from t)
// 3 in (select t.b from t where t.a = tableLeft.a)
// tableLeft.a > (select sum(t.a) from t where tableLeft.b = t.b)
RelatedToLeft,
// like above, but related to right child
RelatedToRight,
// subquery related to both left and child is not supported:
// tableLeft.a > (select sum(t.a) from t where t.b = tableRight.b)
UnSupported
}

private ImmutableList<RelatedInfo> collectRelatedInfo(List<Expression> subqueryConjuncts,
Plan leftChild, Plan rightChild) {
int size = subqueryConjuncts.size();
ImmutableList.Builder<RelatedInfo> correlatedInfoList = new ImmutableList.Builder<>();
Set<Slot> leftOutputSlots = leftChild.getOutputSet();
Set<Slot> rightOutputSlots = rightChild.getOutputSet();
for (int i = 0; i < size; ++i) {
Expression expression = subqueryConjuncts.get(i);
List<SubqueryExpr> subqueryExprs = expression.collectToList(SubqueryExpr.class::isInstance);
RelatedInfo relatedInfo = RelatedInfo.UnSupported;
if (subqueryExprs.size() == 1) {
return slots.containsAll(subqueryExprs.get(0).getCorrelateSlots());
SubqueryExpr subqueryExpr = subqueryExprs.get(0);
List<Slot> correlatedSlots = subqueryExpr.getCorrelateSlots();
if (subqueryExpr instanceof ScalarSubquery) {
Set<Slot> inputSlots = expression.getInputSlots();
if (correlatedSlots.isEmpty() && inputSlots.isEmpty()) {
relatedInfo = RelatedInfo.Unrelated;
} else if (leftOutputSlots.containsAll(inputSlots)
&& leftOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToLeft;
} else if (rightOutputSlots.containsAll(inputSlots)
&& rightOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToRight;
}
} else if (subqueryExpr instanceof InSubquery) {
InSubquery inSubquery = (InSubquery) subqueryExpr;
Set<Slot> compareSlots = inSubquery.getCompareExpr().getInputSlots();
if (compareSlots.isEmpty()) {
relatedInfo = RelatedInfo.UnSupported;
} else if (leftOutputSlots.containsAll(compareSlots)
&& leftOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToLeft;
} else if (rightOutputSlots.containsAll(compareSlots)
&& rightOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToRight;
}
} else if (subqueryExpr instanceof Exists) {
if (correlatedSlots.isEmpty()) {
relatedInfo = RelatedInfo.Unrelated;
} else if (leftOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToLeft;
} else if (rightOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToRight;
}
}
}
correlatedInfoList.add(relatedInfo);
}
return false;
return correlatedInfoList.build();
}

private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPlan childPlan,
Expand All @@ -270,10 +330,17 @@ private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPla
private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot) {
return exists instanceof Exists
&& exists.getQueryPlan()
.anyMatch(planTreeNode -> planTreeNode instanceof LogicalAggregate
&& ((LogicalAggregate<?>) planTreeNode).getGroupByExpressions().isEmpty())
&& !subqueryToMarkJoinSlot.get(exists).isPresent();
&& !subqueryToMarkJoinSlot.get(exists).isPresent()
&& hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
}

private boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
if (plan instanceof LogicalAggregate) {
return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
} else if (plan instanceof LogicalProject || plan instanceof LogicalSort) {
return hasTopLevelAggWithoutGroupBy(plan.child(0));
}
return false;
}

private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,6 @@ true
-- !cir_5218_exists_ok_6 --
0

-- !doris_7643 --
3 3

56 changes: 56 additions & 0 deletions regression-test/suites/nereids_p0/subquery/test_subquery.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,62 @@ suite("test_subquery") {
select * from nereids_test_query_db.baseall where k1 = (select k1 from nereids_test_query_db.baseall order by k1 desc limit 1)
"""

sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""
sql """CREATE TABLE `table_1000_undef_undef` (
`pk` int(11) NULL,
`col_bigint_undef_signed` bigint(20) NULL,
`col_bigint_undef_signed2` bigint(20) NULL
) ENGINE=OLAP
DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`pk`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
); """

sql """ CREATE TABLE `table_1000_undef_undef2` (
`pk` int(11) NULL,
`col_bigint_undef_signed` bigint(20) NULL,
`col_bigint_undef_signed2` bigint(20) NULL
) ENGINE=OLAP
DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`pk`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);"""
explain {
sql """
SELECT `col_bigint_undef_signed` '00:39:36' , `col_bigint_undef_signed` '11:19:45', `col_bigint_undef_signed` '11:55:37', `col_bigint_undef_signed2` '19:01:23'
FROM table_1000_undef_undef2
WHERE EXISTS
(SELECT `col_bigint_undef_signed` '17:38:13' , `col_bigint_undef_signed2` '17:36:21'
FROM table_1000_undef_undef2
WHERE `col_bigint_undef_signed2` NOT IN
(SELECT `col_bigint_undef_signed`
FROM table_1000_undef_undef2
WHERE `col_bigint_undef_signed2` <
(SELECT AVG(`col_bigint_undef_signed`)
FROM table_1000_undef_undef2
WHERE `col_bigint_undef_signed2` < 2)) ) ;
"""
contains("VAGGREGATE")
}

sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""

sql """drop table if exists test_one_row_relation;"""
sql """
CREATE TABLE `test_one_row_relation` (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,34 @@ suite ("sub_query_correlated") {
exception "Unsupported correlated subquery with grouping and/or aggregation";
}

qt_doris_7643 """
SELECT sub_query_correlated_subquery6.*
FROM sub_query_correlated_subquery6
JOIN sub_query_correlated_subquery7
ON sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery7.k3
AND EXISTS
(SELECT sub_query_correlated_subquery8.k1
FROM sub_query_correlated_subquery8 )
AND sub_query_correlated_subquery6.k2 IN
(SELECT sub_query_correlated_subquery8.k2
FROM sub_query_correlated_subquery8 )
AND sub_query_correlated_subquery6.k1 IN
(SELECT sub_query_correlated_subquery8.k2
FROM sub_query_correlated_subquery8
WHERE sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery8.k2 )
AND sub_query_correlated_subquery7.k3 IN
(SELECT sub_query_correlated_subquery8.k1
FROM sub_query_correlated_subquery8 )
AND 10 >
(SELECT min(sub_query_correlated_subquery8.k2)
FROM sub_query_correlated_subquery8 )
AND sub_query_correlated_subquery7.k3 IN
(SELECT sub_query_correlated_subquery8.k2
FROM sub_query_correlated_subquery8
WHERE sub_query_correlated_subquery7.v1 = sub_query_correlated_subquery8.k2 )
ORDER BY sub_query_correlated_subquery6.k1, sub_query_correlated_subquery6.k2;
"""

// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2;
// """
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/query_p0/join/test_join.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ suite("test_join", "query,p0") {
qt_join_on_predicate7"""SELECT t2.k1,t2.k2,t3.k1,t3.k2 FROM baseall t2 LEFT JOIN test t3 ON t2.k2=t3.k2 WHERE t2.k1 = 4 OR (t2.k1 > 4 AND t3.k1 IS NULL) order by 1, 2, 3, 4"""

test {
sql "select a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;"
sql "select /*+ SET_VAR(enable_nereids_planner=false) */ a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;"
exception "Not support OnClause contain Subquery"
}

Expand Down