From 23f4588e7562b3fc24aef81b61c216fbdcc03a57 Mon Sep 17 00:00:00 2001 From: foxtail463 Date: Mon, 11 May 2026 17:45:54 +0800 Subject: [PATCH] [fix](mv) Align extra-join elimination safety check (#62527) Problem Summary: Extra-join elimination rejected some safe LEFT JOIN rewrites too early. When the right side was guaranteed unique, a LEFT JOIN could still be safely eliminated even if its ON clause contained right-side `otherJoinConjuncts`, because those predicates only decide whether the nullable side matches and do not filter preserved-side rows. Example: -- MV SELECT t1.id, t1.name FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t2.id = 1 -- Query SELECT id, name FROM t1 If `t2.id` is unique and no output or residual filter depends on `t2`, eliminating `t2` is safe. This PR relaxes the MV comparison path for that LEFT JOIN case, while still rejecting eliminations when unmatched residual filters depend on the eliminated side. For example, filters above the join such as `WHERE t2.id IS NULL`, or INNER JOIN filters such as `WHERE lineitem.l_shipdate = '2024-01-01'`, cannot be dropped because they affect which preserved-side rows survive. To make that safety check precise, `FilterEdge` now records the actual input nodes of its predicates during HyperGraph construction, including projected aliases and slot-free predicates. Co-authored-by: yangtao555 Co-authored-by: Dongyang Li --- .../jobs/joinorder/hypergraph/HyperGraph.java | 8 +- .../joinorder/hypergraph/edge/FilterEdge.java | 11 +- .../exploration/mv/HyperGraphComparator.java | 51 ++++--- .../exploration/mv/EliminateJoinTest.java | 129 ++++++++++++++++++ .../join_elim_filter_edge.out | 5 + .../join_elim_filter_edge.groovy | 79 +++++++++++ 6 files changed, 264 insertions(+), 19 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out create mode 100644 regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java index 99d3d9d9282bad..192864bb50b40b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java @@ -574,8 +574,14 @@ private BitSet addJoin(LogicalJoin join, } private BitSet addFilter(LogicalFilter filter, Pair childEdgeNodes) { + // Record the nodes actually used by the filter predicates when building the graph. + // slotToNodeMap already follows project aliases through addAlias(), so filters on alias + // slots still point back to the original base nodes. Slot-free predicates, e.g. 1 = 0, + // affect the whole child subtree and must not be treated as unrelated to every node. + long inputNodes = filter.getInputSlots().isEmpty() + ? childEdgeNodes.second : calNodeMap(filter.getInputSlots()); FilterEdge edge = new FilterEdge(filter, filterEdges.size(), childEdgeNodes.first, childEdgeNodes.second, - childEdgeNodes.second); + childEdgeNodes.second, inputNodes); filterEdges.add(edge); BitSet bitSet = new BitSet(); bitSet.set(edge.getIndex()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java index 52c94abe3a16c9..813edaa7910b56 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java @@ -31,11 +31,13 @@ */ public class FilterEdge extends Edge { private final LogicalFilter filter; + private final long inputNodes; public FilterEdge(LogicalFilter filter, int index, - BitSet childEdges, long subTreeNodes, long childRequireNodes) { + BitSet childEdges, long subTreeNodes, long childRequireNodes, long inputNodes) { super(index, childEdges, new BitSet(), subTreeNodes, childRequireNodes, 0L); this.filter = filter; + this.inputNodes = inputNodes; } @Override @@ -48,7 +50,12 @@ public List getExpressions() { return filter.getExpressions(); } + public long getInputNodes() { + return inputNodes; + } + public FilterEdge clear() { - return new FilterEdge(filter, getIndex(), getLeftChildEdges(), getSubTreeNodes(), getLeftRequiredNodes()); + return new FilterEdge(filter, getIndex(), getLeftChildEdges(), getSubTreeNodes(), getLeftRequiredNodes(), + inputNodes); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java index 11f2e7c409da33..39c2cc4213c865 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java @@ -165,7 +165,7 @@ private ComparisonResult isLogicCompatible() { .forEach(e -> pullUpViewExprWithEdge.put(e, e.getExpressions())); Sets.difference(getViewFilterEdgeSet(), Sets.newHashSet(queryToViewFilterEdge.values())) .stream() - .filter(e -> !LongBitmap.isOverlap(e.getReferenceNodes(), shouldEliminateViewNodesMap)) + .filter(e -> !LongBitmap.isOverlap(e.getInputNodes(), shouldEliminateViewNodesMap)) .forEach(e -> pullUpViewExprWithEdge.put(e, e.getExpressions())); return buildComparisonRes(); @@ -197,6 +197,16 @@ private boolean canEliminatePrimaryByForeign(long primaryNodes, long foreignNode return JoinUtils.canEliminateByFk(joinEdge.getJoin(), primary, foreign); } + private boolean canEliminateViewByLeft(JoinEdge joinEdge, Plan rightPlan) { + // MV comparison can validate residual predicates separately. Keep this relaxation local instead of + // changing JoinUtils.canEliminateByLeft(), which is shared by standalone rewrite rules. + Pair, Set> njHashKeys = joinEdge.getJoin().extractNullRejectHashKeys(); + if (njHashKeys == null) { + return false; + } + return rightPlan.getLogicalProperties().getTrait().isUnique(njHashKeys.second); + } + private boolean canEliminateViewEdge(JoinEdge joinEdge) { // eliminate by unique if (joinEdge.getJoinType().isLeftOuterJoin() && joinEdge.isRightSimple()) { @@ -205,12 +215,11 @@ private boolean canEliminateViewEdge(JoinEdge joinEdge) { if (LongBitmap.getCardinality(eliminatedRight) != 1) { return false; } - Plan rigthPlan = constructViewPlan(joinEdge.getRightExtendedNodes(), joinEdge.getRightInputSlots()); - if (rigthPlan == null) { + Plan rightPlan = constructViewPlan(joinEdge.getRightExtendedNodes(), joinEdge.getRightInputSlots()); + if (rightPlan == null) { return false; } - boolean couldEliminateByLeft = JoinUtils.canEliminateByLeft(joinEdge.getJoin(), - rigthPlan.getLogicalProperties().getTrait()); + boolean couldEliminateByLeft = canEliminateViewByLeft(joinEdge, rightPlan); // if eliminated successfully, should refresh the eliminateViewNodesMap if (couldEliminateByLeft) { this.reservedShouldEliminatedViewNodes = @@ -254,21 +263,31 @@ private boolean canEliminateViewEdge(JoinEdge joinEdge) { } private boolean tryEliminateNodesAndEdge() { - boolean hasFilterEdgeAbove = viewHyperGraph.getFilterEdges().stream() - .filter(e -> LongBitmap.getCardinality(e.getReferenceNodes()) == 1) - .anyMatch(e -> LongBitmap.isSubset(e.getReferenceNodes(), shouldEliminateViewNodesMap)); - if (hasFilterEdgeAbove) { - // If there is some filter edge above the eliminated node, we should rebuild a plan - // Right now, just reject it. - return false; - } long allCanEliminateNodes = 0; for (JoinEdge joinEdge : viewHyperGraph.getJoinEdges()) { long canEliminateSideNodes = getCanEliminateSideNodes(joinEdge); allCanEliminateNodes = LongBitmap.or(allCanEliminateNodes, canEliminateSideNodes); - if (LongBitmap.isOverlap(canEliminateSideNodes, reservedShouldEliminatedViewNodes) - && !canEliminateViewEdge(joinEdge)) { - return false; + long shouldEliminateNodesOnCurrentEdge = + LongBitmap.newBitmapIntersect(canEliminateSideNodes, reservedShouldEliminatedViewNodes); + if (LongBitmap.isOverlap(canEliminateSideNodes, reservedShouldEliminatedViewNodes)) { + // For FilterEdge, leftChildEdges records join edges below the filter child. Non-empty means + // the filter is above a join result, e.g. WHERE after LEFT JOIN, and may filter preserved rows. + // Empty means the filter stays inside a base/nullable-side subtree, which is safe for LOJ. + boolean hasUnsafeFilterEdgeOnCurrentEliminatedNode = + LongBitmap.getCardinality(shouldEliminateNodesOnCurrentEdge) > 0 + && viewHyperGraph.getFilterEdges().stream() + .filter(e -> e.getExpressions().stream().anyMatch(expr -> !ExpressionUtils.isInferred(expr))) + .filter(e -> LongBitmap.isOverlap( + e.getInputNodes(), shouldEliminateNodesOnCurrentEdge)) + .anyMatch(e -> joinEdge.getJoinType().isInnerJoin() || !e.getLeftChildEdges().isEmpty()); + // Inner join elimination is unsafe with any filter on the removed side. For left join, + // filters inside the nullable side are OK, but filters above the join can filter left rows. + if (hasUnsafeFilterEdgeOnCurrentEliminatedNode) { + return false; + } + if (!canEliminateViewEdge(joinEdge)) { + return false; + } } } // check all can eliminateNodes contains all should eliminate nodes, to avoid some nodes can not be eliminated diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java index 2bec767d46f041..20697e78912d20 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java @@ -100,6 +100,105 @@ void testLOJWithUK() throws Exception { dropConstraint("alter table T2 drop constraint uk"); } + @Test + void testLOJWithUKAndOtherJoinConjuncts() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select * from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint uk_other_join_conjunct unique (id)"); + try { + CascadesContext c2 = createCascadesContext( + "select * from T1 left outer join T2 " + + "on T1.id = T2.id and T2.id = 1", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertFalse(res.isInvalid()); + Assertions.assertTrue(res.getViewExpressions().isEmpty()); + } finally { + dropConstraint("alter table T2 drop constraint uk_other_join_conjunct"); + } + } + + @Test + void testLOJWithUKAndFilterOnEliminatedNode() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select T1.id from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint uk_loj_filter unique (id)"); + try { + CascadesContext c2 = createCascadesContext( + "select T1.id from T1 left outer join T2 " + + "on T1.id = T2.id where T2.id is null", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertTrue(res.isInvalid()); + } finally { + dropConstraint("alter table T2 drop constraint uk_loj_filter"); + } + } + + @Test + void testInnerJoinWithPKFKAndSlotFreeFilterOnEliminatedNode() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select * from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint pk_slot_free_filter primary key (id)"); + addConstraint("alter table T1 add constraint fk_slot_free_filter foreign key (id) references T2(id)"); + try { + CascadesContext c2 = createCascadesContext( + "select * from T1 inner join (select * from T2 where 1 = 0) T2 " + + "on T1.id = T2.id", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertTrue(res.isInvalid()); + } finally { + dropConstraint("alter table T1 drop constraint fk_slot_free_filter"); + dropConstraint("alter table T2 drop constraint pk_slot_free_filter"); + } + } + @Test void testLOJWithPKFK() throws Exception { connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); @@ -143,6 +242,36 @@ void testLOJWithPKFK() throws Exception { dropConstraint("alter table T2 drop constraint pk"); } + @Test + void testInnerJoinWithPKFKAndMultiNodeResidualFilter() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select * from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint pk primary key (id)"); + addConstraint("alter table T1 add constraint fk foreign key (id) references T2(id)"); + CascadesContext c2 = createCascadesContext( + "select * from T1 inner join T2 " + + "on T1.id = T2.id where T1.score > T2.score", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertTrue(res.isInvalid()); + dropConstraint("alter table T2 drop constraint pk"); + } + @Disabled @Test void testLOJWithPKFKAndUK1() throws Exception { diff --git a/regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out b/regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out new file mode 100644 index 00000000000000..37037d7bad2818 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !join_elim_filter_edge -- +1 Alice +2 Bob +3 Charlie diff --git a/regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy b/regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy new file mode 100644 index 00000000000000..57f5c3abdb9cbb --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy @@ -0,0 +1,79 @@ +package mv.join_elim_p_f_key +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("join_elim_filter_edge") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_nereids_timeout = false" + + sql """DROP MATERIALIZED VIEW IF EXISTS mv_join_elim_filter_edge""" + sql """DROP TABLE IF EXISTS join_elim_filter_edge_t1""" + sql """DROP TABLE IF EXISTS join_elim_filter_edge_t2""" + + sql """CREATE TABLE join_elim_filter_edge_t1 ( + id INT, + name VARCHAR(50) + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1");""" + + sql """CREATE TABLE join_elim_filter_edge_t2 ( + id INT, + value INT + ) + UNIQUE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + );""" + + sql """ALTER TABLE join_elim_filter_edge_t2 ADD CONSTRAINT uk_id UNIQUE (id)""" + + sql """INSERT INTO join_elim_filter_edge_t1 VALUES + (1, 'Alice'), + (2, 'Bob'), + (3, 'Charlie');""" + + sql """INSERT INTO join_elim_filter_edge_t2 VALUES + (1, 10), + (2, -5), + (4, 20);""" + + sql """ANALYZE TABLE join_elim_filter_edge_t1 WITH SYNC;""" + sql """ANALYZE TABLE join_elim_filter_edge_t2 WITH SYNC;""" + sql """ALTER TABLE join_elim_filter_edge_t1 MODIFY COLUMN name SET STATS ('row_count'='3');""" + sql """ALTER TABLE join_elim_filter_edge_t2 MODIFY COLUMN value SET STATS ('row_count'='3');""" + + create_async_mv(db, "mv_join_elim_filter_edge", """ + SELECT + t1.id, + t1.name + FROM join_elim_filter_edge_t1 t1 + LEFT JOIN join_elim_filter_edge_t2 t2 + ON t1.id = t2.id AND t2.id = 1 + """) + + def querySql = """SELECT id, name FROM join_elim_filter_edge_t1""" + mv_rewrite_success_without_check_chosen(querySql, "mv_join_elim_filter_edge") + order_qt_join_elim_filter_edge """SELECT id, name FROM join_elim_filter_edge_t1 ORDER BY 1, 2""" +}