Skip to content

Commit

Permalink
[fix](nereids) support uncorrelated subquery in join condition (#26893)
Browse files Browse the repository at this point in the history
pick from master #26672 
commit id: 17b1108
  • Loading branch information
starocean999 committed Nov 16, 2023
1 parent 4d24350 commit 59d0176
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.BinaryOperator;
import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
import org.apache.doris.nereids.trees.expressions.Exists;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.InSubquery;
Expand All @@ -45,6 +44,7 @@
import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalSort;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
Expand Down Expand Up @@ -180,16 +180,23 @@ public List<Rule> buildRules() {
Collectors.toList()));
List<Expression> subqueryConjuncts = joinConjuncts.get(true);
if (subqueryConjuncts == null || subqueryConjuncts.stream()
.anyMatch(expr -> !isValidSubqueryConjunct(expr, join.left()))) {
.anyMatch(expr -> !isValidSubqueryConjunct(expr))) {
return join;
}

List<RelatedInfo> relatedInfoList = collectRelatedInfo(
subqueryConjuncts, join.left(), join.right());
if (relatedInfoList.stream().anyMatch(info -> info == RelatedInfo.UnSupported)) {
return join;
}

ImmutableList<Set<SubqueryExpr>> subqueryExprsList = subqueryConjuncts.stream()
.<Set<SubqueryExpr>>map(e -> e.collect(SubqueryExpr.class::isInstance))
.collect(ImmutableList.toImmutableList());
ImmutableList.Builder<Expression> newConjuncts = new ImmutableList.Builder<>();
LogicalPlan applyPlan = null;
LogicalPlan applyPlan;
LogicalPlan leftChildPlan = (LogicalPlan) join.left();
LogicalPlan rightChildPlan = (LogicalPlan) join.right();

// Subquery traversal with the conjunct of and as the granularity.
for (int i = 0; i < subqueryExprsList.size(); ++i) {
Expand All @@ -207,44 +214,97 @@ public List<Rule> buildRules() {

applyPlan = subqueryToApply(
subqueryExprs.stream().collect(ImmutableList.toImmutableList()),
leftChildPlan, context.getSubqueryToMarkJoinSlot(),
relatedInfoList.get(i) == RelatedInfo.RelatedToLeft ? leftChildPlan : rightChildPlan,
context.getSubqueryToMarkJoinSlot(),
ctx.cascadesContext, Optional.of(conjunct), false);
leftChildPlan = applyPlan;
if (relatedInfoList.get(i) == RelatedInfo.RelatedToLeft) {
leftChildPlan = applyPlan;
} else {
rightChildPlan = applyPlan;
}
newConjuncts.add(conjunct);
}
List<Expression> simpleConjuncts = joinConjuncts.get(false);
if (simpleConjuncts != null) {
newConjuncts.addAll(simpleConjuncts);
}
Plan newJoin = join.withConjunctsChildren(join.getHashJoinConjuncts(),
newConjuncts.build(), applyPlan, join.right());
newConjuncts.build(), leftChildPlan, rightChildPlan);
return newJoin;
}))
);
}

private static boolean isValidSubqueryConjunct(Expression expression, Plan leftChild) {
// the subquery must be uncorrelated subquery or only correlated to the left child
// currently only support the following 4 simple scenarios
// 1. col ComparisonPredicate subquery
// 2. col in (subquery)
// 3. exists (subquery)
// 4. col1 ComparisonPredicate subquery or xxx (no more subquery)
List<Slot> slots = leftChild.getOutput();
if (expression instanceof ComparisonPredicate && expression.child(1) instanceof ScalarSubquery) {
ScalarSubquery subquery = (ScalarSubquery) expression.child(1);
return slots.containsAll(subquery.getCorrelateSlots());
} else if (expression instanceof InSubquery) {
return slots.containsAll(((InSubquery) expression).getCorrelateSlots());
} else if (expression instanceof Exists) {
return slots.containsAll(((Exists) expression).getCorrelateSlots());
} else {
private static boolean isValidSubqueryConjunct(Expression expression) {
// only support 1 subquery expr in the expression
// don't support expression like subquery1 or subquery2
return expression.collectToList(SubqueryExpr.class::isInstance).size() == 1;
}

private enum RelatedInfo {
// both subquery and its output don't related to any child. like (select sum(t.a) from t) > 1
Unrelated,
// either subquery or its output only related to left child. like bellow:
// tableLeft.a in (select t.a from t)
// 3 in (select t.b from t where t.a = tableLeft.a)
// tableLeft.a > (select sum(t.a) from t where tableLeft.b = t.b)
RelatedToLeft,
// like above, but related to right child
RelatedToRight,
// subquery related to both left and child is not supported:
// tableLeft.a > (select sum(t.a) from t where t.b = tableRight.b)
UnSupported
}

private ImmutableList<RelatedInfo> collectRelatedInfo(List<Expression> subqueryConjuncts,
Plan leftChild, Plan rightChild) {
int size = subqueryConjuncts.size();
ImmutableList.Builder<RelatedInfo> correlatedInfoList = new ImmutableList.Builder<>();
Set<Slot> leftOutputSlots = leftChild.getOutputSet();
Set<Slot> rightOutputSlots = rightChild.getOutputSet();
for (int i = 0; i < size; ++i) {
Expression expression = subqueryConjuncts.get(i);
List<SubqueryExpr> subqueryExprs = expression.collectToList(SubqueryExpr.class::isInstance);
RelatedInfo relatedInfo = RelatedInfo.UnSupported;
if (subqueryExprs.size() == 1) {
return slots.containsAll(subqueryExprs.get(0).getCorrelateSlots());
SubqueryExpr subqueryExpr = subqueryExprs.get(0);
List<Slot> correlatedSlots = subqueryExpr.getCorrelateSlots();
if (subqueryExpr instanceof ScalarSubquery) {
Set<Slot> inputSlots = expression.getInputSlots();
if (correlatedSlots.isEmpty() && inputSlots.isEmpty()) {
relatedInfo = RelatedInfo.Unrelated;
} else if (leftOutputSlots.containsAll(inputSlots)
&& leftOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToLeft;
} else if (rightOutputSlots.containsAll(inputSlots)
&& rightOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToRight;
}
} else if (subqueryExpr instanceof InSubquery) {
InSubquery inSubquery = (InSubquery) subqueryExpr;
Set<Slot> compareSlots = inSubquery.getCompareExpr().getInputSlots();
if (compareSlots.isEmpty()) {
relatedInfo = RelatedInfo.UnSupported;
} else if (leftOutputSlots.containsAll(compareSlots)
&& leftOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToLeft;
} else if (rightOutputSlots.containsAll(compareSlots)
&& rightOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToRight;
}
} else if (subqueryExpr instanceof Exists) {
if (correlatedSlots.isEmpty()) {
relatedInfo = RelatedInfo.Unrelated;
} else if (leftOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToLeft;
} else if (rightOutputSlots.containsAll(correlatedSlots)) {
relatedInfo = RelatedInfo.RelatedToRight;
}
}
}
correlatedInfoList.add(relatedInfo);
}
return false;
return correlatedInfoList.build();
}

private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPlan childPlan,
Expand All @@ -270,10 +330,17 @@ private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPla
private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot) {
return exists instanceof Exists
&& exists.getQueryPlan()
.anyMatch(planTreeNode -> planTreeNode instanceof LogicalAggregate
&& ((LogicalAggregate<?>) planTreeNode).getGroupByExpressions().isEmpty())
&& !subqueryToMarkJoinSlot.get(exists).isPresent();
&& !subqueryToMarkJoinSlot.get(exists).isPresent()
&& hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
}

private boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
if (plan instanceof LogicalAggregate) {
return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
} else if (plan instanceof LogicalProject || plan instanceof LogicalSort) {
return hasTopLevelAggWithoutGroupBy(plan.child(0));
}
return false;
}

private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,6 @@ true
-- !cir_5218_exists_ok_6 --
0

-- !doris_7643 --
3 3

56 changes: 56 additions & 0 deletions regression-test/suites/nereids_p0/subquery/test_subquery.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,62 @@ suite("test_subquery") {
select * from nereids_test_query_db.baseall where k1 = (select k1 from nereids_test_query_db.baseall order by k1 desc limit 1)
"""

sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""
sql """CREATE TABLE `table_1000_undef_undef` (
`pk` int(11) NULL,
`col_bigint_undef_signed` bigint(20) NULL,
`col_bigint_undef_signed2` bigint(20) NULL
) ENGINE=OLAP
DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`pk`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
); """

sql """ CREATE TABLE `table_1000_undef_undef2` (
`pk` int(11) NULL,
`col_bigint_undef_signed` bigint(20) NULL,
`col_bigint_undef_signed2` bigint(20) NULL
) ENGINE=OLAP
DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`pk`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);"""
explain {
sql """
SELECT `col_bigint_undef_signed` '00:39:36' , `col_bigint_undef_signed` '11:19:45', `col_bigint_undef_signed` '11:55:37', `col_bigint_undef_signed2` '19:01:23'
FROM table_1000_undef_undef2
WHERE EXISTS
(SELECT `col_bigint_undef_signed` '17:38:13' , `col_bigint_undef_signed2` '17:36:21'
FROM table_1000_undef_undef2
WHERE `col_bigint_undef_signed2` NOT IN
(SELECT `col_bigint_undef_signed`
FROM table_1000_undef_undef2
WHERE `col_bigint_undef_signed2` <
(SELECT AVG(`col_bigint_undef_signed`)
FROM table_1000_undef_undef2
WHERE `col_bigint_undef_signed2` < 2)) ) ;
"""
contains("VAGGREGATE")
}

sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""

sql """drop table if exists test_one_row_relation;"""
sql """
CREATE TABLE `test_one_row_relation` (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,34 @@ suite ("sub_query_correlated") {
exception "Unsupported correlated subquery with grouping and/or aggregation";
}

qt_doris_7643 """
SELECT sub_query_correlated_subquery6.*
FROM sub_query_correlated_subquery6
JOIN sub_query_correlated_subquery7
ON sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery7.k3
AND EXISTS
(SELECT sub_query_correlated_subquery8.k1
FROM sub_query_correlated_subquery8 )
AND sub_query_correlated_subquery6.k2 IN
(SELECT sub_query_correlated_subquery8.k2
FROM sub_query_correlated_subquery8 )
AND sub_query_correlated_subquery6.k1 IN
(SELECT sub_query_correlated_subquery8.k2
FROM sub_query_correlated_subquery8
WHERE sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery8.k2 )
AND sub_query_correlated_subquery7.k3 IN
(SELECT sub_query_correlated_subquery8.k1
FROM sub_query_correlated_subquery8 )
AND 10 >
(SELECT min(sub_query_correlated_subquery8.k2)
FROM sub_query_correlated_subquery8 )
AND sub_query_correlated_subquery7.k3 IN
(SELECT sub_query_correlated_subquery8.k2
FROM sub_query_correlated_subquery8
WHERE sub_query_correlated_subquery7.v1 = sub_query_correlated_subquery8.k2 )
ORDER BY sub_query_correlated_subquery6.k1, sub_query_correlated_subquery6.k2;
"""

// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2;
// """
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/query_p0/join/test_join.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ suite("test_join", "query,p0") {
qt_join_on_predicate7"""SELECT t2.k1,t2.k2,t3.k1,t3.k2 FROM baseall t2 LEFT JOIN test t3 ON t2.k2=t3.k2 WHERE t2.k1 = 4 OR (t2.k1 > 4 AND t3.k1 IS NULL) order by 1, 2, 3, 4"""

test {
sql "select a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;"
sql "select /*+ SET_VAR(enable_nereids_planner=false) */ a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;"
exception "Not support OnClause contain Subquery"
}

Expand Down

0 comments on commit 59d0176

Please sign in to comment.