From 30b0e84b9b9c08487d81f6e2bb90156c776243da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E5=81=A5?= Date: Thu, 7 Mar 2024 19:50:02 +0800 Subject: [PATCH 1/2] [feature](Nereids) support make miss slot as null alias when converting anti join (#31854) transform project(A.*, B.slot) - filter(B.slot is null) - LeftOuterJoin(A, B) to project(A.*, null as B.slot) - LeftAntiJoin(A, B) --- .../rewrite/ConvertOuterJoinToAntiJoin.java | 76 ++++++++----------- .../ConvertOuterJoinToAntiJoinTest.java | 2 +- .../transform_outer_join_to_anti.groovy | 20 +++++ 3 files changed, 54 insertions(+), 44 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoin.java index 74bd7e29142d03..ff1436252e3f1e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoin.java @@ -19,8 +19,10 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; @@ -28,8 +30,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.TypeUtils; -import com.google.common.collect.ImmutableSet; - +import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -45,23 +46,15 @@ public class ConvertOuterJoinToAntiJoin extends OneRewriteRuleFactory { @Override public Rule build() { - return logicalProject(logicalFilter(logicalJoin() - .when(join -> join.getJoinType().isOuterJoin()))) + return logicalFilter(logicalJoin() + .when(join -> join.getJoinType().isOuterJoin())) .then(this::toAntiJoin) .toRule(RuleType.CONVERT_OUTER_JOIN_TO_ANTI); } - private Plan toAntiJoin(LogicalProject>> project) { - LogicalFilter> filter = project.child(); + private Plan toAntiJoin(LogicalFilter> filter) { LogicalJoin join = filter.child(); - boolean leftOutput = join.left().getOutputSet().containsAll(project.getInputSlots()); - boolean rightOutput = join.right().getOutputSet().containsAll(project.getInputSlots()); - - if (!leftOutput && !rightOutput) { - return null; - } - Set alwaysNullSlots = filter.getConjuncts().stream() .filter(p -> TypeUtils.isNull(p).isPresent()) .flatMap(p -> p.getInputSlots().stream()) @@ -73,36 +66,33 @@ private Plan toAntiJoin(LogicalProject>> p .filter(s -> alwaysNullSlots.contains(s) && !s.nullable()) .collect(Collectors.toSet()); - Plan res = project; - if (join.getJoinType().isLeftOuterJoin() && !rightAlwaysNullSlots.isEmpty() && leftOutput) { - // When there is right slot always null, we can turn left outer join to left anti join - Set predicates = filter.getExpressions().stream() - .filter(p -> !(TypeUtils.isNull(p).isPresent() - && rightAlwaysNullSlots.containsAll(p.getInputSlots()))) - .collect(ImmutableSet.toImmutableSet()); - boolean containRightSlot = predicates.stream() - .flatMap(p -> p.getInputSlots().stream()) - .anyMatch(join.right().getOutputSet()::contains); - if (!containRightSlot) { - res = join.withJoinType(JoinType.LEFT_ANTI_JOIN); - res = predicates.isEmpty() ? res : filter.withConjuncts(predicates).withChildren(res); - res = project.withChildren(res); - } + Plan newJoin = null; + if (join.getJoinType().isLeftOuterJoin() && !rightAlwaysNullSlots.isEmpty()) { + newJoin = join.withJoinType(JoinType.LEFT_ANTI_JOIN); } - if (join.getJoinType().isRightOuterJoin() && !leftAlwaysNullSlots.isEmpty() && rightOutput) { - Set predicates = filter.getExpressions().stream() - .filter(p -> !(TypeUtils.isNull(p).isPresent() - && leftAlwaysNullSlots.containsAll(p.getInputSlots()))) - .collect(ImmutableSet.toImmutableSet()); - boolean containLeftSlot = predicates.stream() - .flatMap(p -> p.getInputSlots().stream()) - .anyMatch(join.left().getOutputSet()::contains); - if (!containLeftSlot) { - res = join.withJoinType(JoinType.RIGHT_ANTI_JOIN); - res = predicates.isEmpty() ? res : filter.withConjuncts(predicates).withChildren(res); - res = project.withChildren(res); - } + if (join.getJoinType().isRightOuterJoin() && !leftAlwaysNullSlots.isEmpty()) { + newJoin = join.withJoinType(JoinType.RIGHT_ANTI_JOIN); + } + if (newJoin == null) { + return null; + } + + if (!newJoin.getOutputSet().containsAll(filter.getInputSlots())) { + // if there are slots that don't belong to join output, we use null alias to replace them + // such as: + // project(A.id, null as B.id) + // - (A left anti join B) + Set joinOutput = newJoin.getOutputSet(); + List projects = filter.getOutput().stream() + .map(s -> { + if (joinOutput.contains(s)) { + return s; + } else { + return new Alias(s.getExprId(), new NullLiteral(s.getDataType()), s.getName()); + } + }).collect(Collectors.toList()); + newJoin = new LogicalProject<>(projects, newJoin); } - return res; + return filter.withChildren(newJoin); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoinTest.java index 20b36d3272e97d..1159fc2a7cec6d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ConvertOuterJoinToAntiJoinTest.java @@ -111,7 +111,7 @@ void testEliminateLeftWithRightPredicate() { .applyTopDown(new InferFilterNotNull()) .applyTopDown(new ConvertOuterJoinToAntiJoin()) .printlnTree() - .matches(logicalJoin().when(join -> join.getJoinType().isLeftOuterJoin())); + .matches(logicalJoin().when(join -> join.getJoinType().isLeftAntiJoin())); } @Test diff --git a/regression-test/suites/nereids_syntax_p0/transform_outer_join_to_anti.groovy b/regression-test/suites/nereids_syntax_p0/transform_outer_join_to_anti.groovy index 06f87359d92d09..3628063f43e9d3 100644 --- a/regression-test/suites/nereids_syntax_p0/transform_outer_join_to_anti.groovy +++ b/regression-test/suites/nereids_syntax_p0/transform_outer_join_to_anti.groovy @@ -62,5 +62,25 @@ suite("transform_outer_join_to_anti") { sql("select eliminate_outer_join_B.* from eliminate_outer_join_A right outer join eliminate_outer_join_B on eliminate_outer_join_B.b = eliminate_outer_join_A.a where eliminate_outer_join_A.null_a is null") contains "OUTER JOIN" } + + explain { + sql("select eliminate_outer_join_A.* from eliminate_outer_join_A left outer join eliminate_outer_join_B on eliminate_outer_join_B.b = eliminate_outer_join_A.a where eliminate_outer_join_B.b is null or eliminate_outer_join_A.null_a is null") + contains "OUTER JOIN" + } + + explain { + sql("select * from eliminate_outer_join_A left outer join eliminate_outer_join_B on eliminate_outer_join_B.b = eliminate_outer_join_A.a where eliminate_outer_join_B.b is null and eliminate_outer_join_A.null_a is null") + contains "ANTI JOIN" + } + + explain { + sql("select * from eliminate_outer_join_A left outer join eliminate_outer_join_B on eliminate_outer_join_B.b = eliminate_outer_join_A.a where eliminate_outer_join_B.b is null and eliminate_outer_join_B.null_b is null") + contains "ANTI JOIN" + } + + explain { + sql("select * from eliminate_outer_join_A right outer join eliminate_outer_join_B on eliminate_outer_join_B.b = eliminate_outer_join_A.a where eliminate_outer_join_A.a is null and eliminate_outer_join_B.null_b is null and eliminate_outer_join_A.null_a is null") + contains "ANTI JOIN" + } } From 9b005cab09deb6949c5fe0ab59e40cc45f82bd8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E5=81=A5?= Date: Fri, 15 Mar 2024 19:21:26 +0800 Subject: [PATCH 2/2] [fix](Nereids): don't pushdown project when project contains both side of join (#32214) --- .../join/PushdownProjectThroughSemiJoin.java | 19 +++++++++++ .../PushdownProjectThroughSemiJoinTest.java | 32 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoin.java index 121994082363a5..7da6445d6988f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoin.java @@ -38,6 +38,7 @@ /** * Rule for pushdown project through left-semi/anti join * Just push down project inside join to avoid to push the top of Join-Cluster. + * Note this rule is only used to push down project between join for join ordering. *
  *     Join                     Join
  *      |                        |
@@ -61,6 +62,9 @@ public List buildRules() {
                     .whenNot(j -> j.left().child().hasJoinHint())
                     .then(topJoin -> {
                         LogicalProject> project = topJoin.left();
+                        if (projectBothJoinSide(project)) {
+                            return null;
+                        }
                         Plan newLeft = pushdownProject(project);
                         return topJoin.withChildren(newLeft, topJoin.right());
                     }).toRule(RuleType.PUSHDOWN_PROJECT_THROUGH_SEMI_JOIN_LEFT),
@@ -72,12 +76,27 @@ public List buildRules() {
                     .whenNot(j -> j.right().child().hasJoinHint())
                     .then(topJoin -> {
                         LogicalProject> project = topJoin.right();
+                        if (projectBothJoinSide(project)) {
+                            return null;
+                        }
                         Plan newRight = pushdownProject(project);
                         return topJoin.withChildren(topJoin.left(), newRight);
                     }).toRule(RuleType.PUSHDOWN_PROJECT_THROUGH_SEMI_JOIN_RIGHT)
                 );
     }
 
+    private boolean projectBothJoinSide(LogicalProject> project) {
+        // if project contains both side of join, it can't be pushed.
+        // such as:
+        //  Project(l, null as r)
+        //  ------ L(l) left anti join R(r)
+        LogicalJoin join = project.child();
+        Set projectOutput = project.getOutputSet();
+        boolean containLeft = join.left().getOutput().stream().anyMatch(projectOutput::contains);
+        boolean containRight = join.right().getOutput().stream().anyMatch(projectOutput::contains);
+        return containRight && containLeft;
+    }
+
     private Plan pushdownProject(LogicalProject> project) {
         LogicalJoin join = project.child();
         Set conditionLeftSlots = CBOUtils.joinChildConditionSlots(join, true);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoinTest.java
index 862580208e6489..74c85c60204043 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoinTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughSemiJoinTest.java
@@ -22,6 +22,7 @@
 import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
 import org.apache.doris.nereids.trees.plans.JoinType;
 import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
@@ -133,4 +134,35 @@ void pushComplexProject() {
                         )
                 );
     }
+
+    @Test
+    void testProjectLiteral() {
+        List projectExprs = ImmutableList.of(
+                new Alias(new Add(scan1.getOutput().get(0), Literal.of(1)), "alias"),
+                new Alias(scan2.getOutput().get(0).getExprId(), new NullLiteral(), scan2.getOutput().get(0).getName())
+        );
+        // complex projection contain ti.id, which isn't in Join Condition
+        LogicalPlan plan = new LogicalPlanBuilder(scan1)
+                .join(scan2, JoinType.LEFT_SEMI_JOIN, Pair.of(1, 1))
+                .projectExprs(projectExprs)
+                .join(scan3, JoinType.INNER_JOIN, Pair.of(1, 1))
+                .build();
+        PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
+                .applyExploration(PushdownProjectThroughSemiJoin.INSTANCE.buildRules())
+                .nonMatch(logicalJoin(logicalJoin(logicalProject(), group()), group()));
+
+        projectExprs = ImmutableList.of(
+                new Alias(new Add(scan2.getOutput().get(0), Literal.of(1)), "alias"),
+                new Alias(scan1.getOutput().get(0).getExprId(), new NullLiteral(), scan2.getOutput().get(0).getName())
+        );
+        // complex projection contain ti.id, which isn't in Join Condition
+        plan = new LogicalPlanBuilder(scan1)
+                .join(scan2, JoinType.RIGHT_SEMI_JOIN, Pair.of(1, 1))
+                .projectExprs(projectExprs)
+                .join(scan3, JoinType.INNER_JOIN, Pair.of(1, 1))
+                .build();
+        PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
+                .applyExploration(PushdownProjectThroughSemiJoin.INSTANCE.buildRules())
+                .nonMatch(logicalJoin(logicalJoin(logicalProject(), group()), group()));
+    }
 }