From bb824f35fbe7439b5335db22576c399f1766c26b Mon Sep 17 00:00:00 2001 From: seawinde Date: Sat, 9 May 2026 16:02:54 +0800 Subject: [PATCH] [fix](fe) Add null reject compensation for join rewrite ### What problem does this PR solve? Issue Number: N/A Related PR: #62492 Problem Summary: INNER JoinEdge null-reject inference can validate rewriting an INNER JOIN query by an OUTER JOIN materialized view without adding the required non-null compensation predicate. The rewritten plan can keep null-padded rows from the MV side that should be rejected by the original query. Root cause: In AbstractMaterializedViewRule.predicatesCompensate(), the previous check treated INNER JoinEdge null-reject inference as proof that an OUTER JOIN MV rewrite was valid, but the proof was not materialized as a real IS NOT NULL predicate in the rewritten query. Change Summary: | File | Change Description | |------|--------------------| | AbstractMaterializedViewRule.java | Split predicate-based null-reject proof from INNER JoinEdge proof and add query-based IS NOT NULL compensation when only JoinEdge proof covers required MV nullable sides. Fail rewrite if no safe MV output slot can carry the compensation predicate. | | NullRejectInferenceTest.java | Add unit coverage for LEFT/FULL OUTER JOIN MV rewrites that require INNER JoinEdge null-reject compensation on both sides. | | inner_join_null_reject_compensation.groovy | Add regression coverage with unmatched OUTER JOIN MV rows, including the LEFT JOIN MV to INNER JOIN query repro with nullable join keys. | Design rationale: Existing query predicates already flow through normal predicate compensation, so they do not need extra filters. INNER JoinEdge proof is only logical evidence; when it is needed to reject null-generated MV rows, the rewrite must add a real IS NOT NULL predicate on an MV output slot. If no such slot is available, the rewrite is rejected conservatively. ### Release note Fixed an issue where OUTER JOIN materialized view rewrite could return extra null-padded rows for INNER JOIN queries. ### Check List (For Author) - Test - [x] Regression test - [x] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason Unit tests / checks: - Added NullRejectInferenceTest coverage for INNER/FULL join null-reject compensation on both sides. - Ran git diff --check. - Tried ./run-fe-ut.sh --run org.apache.doris.nereids.rules.exploration.mv.NullRejectInferenceTest, but FE core compilation failed before tests because generated cloud proto classes miss Cloud.CreateMetaSyncPointRequest/Response in MetaServiceClient and MetaServiceProxy. Regression test: - Added inner_join_null_reject_compensation.groovy for FULL/LEFT OUTER JOIN MV rewrites with unmatched null-padded rows. - Not run locally; the local FE UT build is currently blocked by the cloud proto compilation issue above. - Behavior changed: - [x] Yes. OUTER JOIN MV rewrite now adds real IS NOT NULL compensation when INNER JoinEdge null-reject inference is required, or rejects the rewrite if no safe MV output slot can carry that predicate. - [ ] No. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../mv/AbstractMaterializedViewRule.java | 171 +++++++++++--- .../mv/NullRejectInferenceTest.java | 120 +++++++++- .../mv/dimension/dimension_self_conn.groovy | 5 +- ...inner_join_null_reject_compensation.groovy | 217 ++++++++++++++++++ 4 files changed, 474 insertions(+), 39 deletions(-) create mode 100644 regression-test/suites/nereids_rules_p0/mv/join_infer_derive/inner_join_null_reject_compensation.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java index a293e94d77502e..ef94ad458bf5b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.rules.exploration.mv; import org.apache.doris.catalog.MTMV; +import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Id; import org.apache.doris.common.Pair; @@ -44,7 +45,9 @@ import org.apache.doris.nereids.rules.rewrite.MergeProjectable; import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.scalar.DateTrunc; @@ -821,21 +824,28 @@ protected SplitPredicate predicatesCompensate( Set> requireNoNullableViewSlot = comparisonResult.getViewNoNullableSlot(); // check query is use the null reject slot which view comparison need if (!requireNoNullableViewSlot.isEmpty()) { + // Required null-reject slots are recorded on the view side. Map query slots to view slots + // before checking whether query predicates or INNER JoinEdges can reject those null rows. SlotMapping queryToViewMapping = viewToQuerySlotMapping.inverse(); - // try to use - boolean valid = containsNullRejectSlot(requireNoNullableViewSlot, - queryStructInfo.getPredicates().getPulledUpPredicates(), queryToViewMapping, queryStructInfo, - viewStructInfo, cascadesContext); - if (!valid) { + Optional> queryBasedNullRejectCompensationPredicates = + getQueryBasedNullRejectCompensationPredicates( + requireNoNullableViewSlot, + queryStructInfo.getPredicates().getPulledUpPredicates(), queryToViewMapping, + queryStructInfo, viewStructInfo, viewToQuerySlotMapping, cascadesContext); + if (!queryBasedNullRejectCompensationPredicates.isPresent()) { queryStructInfo = queryStructInfo.withPredicates(queryStructInfo.getPredicates() .mergePulledUpPredicates(comparisonResult.getQueryAllPulledUpExpressions())); - valid = containsNullRejectSlot(requireNoNullableViewSlot, - queryStructInfo.getPredicates().getPulledUpPredicates(), queryToViewMapping, - queryStructInfo, viewStructInfo, cascadesContext); + queryBasedNullRejectCompensationPredicates = getQueryBasedNullRejectCompensationPredicates( + requireNoNullableViewSlot, queryStructInfo.getPredicates().getPulledUpPredicates(), + queryToViewMapping, queryStructInfo, viewStructInfo, viewToQuerySlotMapping, cascadesContext); } - if (!valid) { + if (!queryBasedNullRejectCompensationPredicates.isPresent()) { return SplitPredicate.INVALID_INSTANCE; } + if (!queryBasedNullRejectCompensationPredicates.get().isEmpty()) { + queryStructInfo = queryStructInfo.withPredicates(queryStructInfo.getPredicates() + .mergePulledUpPredicates(queryBasedNullRejectCompensationPredicates.get())); + } } // compensate couldNot PulledUp Conjunctions Map couldNotPulledUpCompensateConjunctions = @@ -863,45 +873,106 @@ protected SplitPredicate predicatesCompensate( } /** - * Check the queryPredicates contains the required nullable slot + * Check whether query-side null-reject evidence covers each required view-side slot set. + * + *

The check is view-based because the required null-reject slots come from the MV join graph. + * The returned compensation predicates are query-based because they will be merged into queryStructInfo. + * + *

Return meanings: + * Optional.empty(): no valid proof, or no safe output slot can carry the compensation predicate. + * Optional.of(emptySet()): existing query predicates already provide the required null-reject. + * Optional.of(nonEmptySet): INNER JoinEdge proof must be materialized as these IS NOT NULL predicates. */ - private boolean containsNullRejectSlot(Set> requireNoNullableViewSlot, + private Optional> getQueryBasedNullRejectCompensationPredicates( + Set> requireNoNullableViewSlot, Set queryPredicates, SlotMapping queryToViewMapping, StructInfo queryStructInfo, StructInfo viewStructInfo, + SlotMapping viewToQueryMapping, CascadesContext cascadesContext) { - Set queryNullRejectSlots = new HashSet<>(); + Set predicateNullRejectViewSlots = getViewBasedNullRejectSlots( + getPredicateNullRejectSlots(queryPredicates, cascadesContext), queryToViewMapping, queryStructInfo); + Set innerJoinNullRejectViewSlots = getViewBasedNullRejectSlots( + getInnerJoinNullRejectSlots(queryStructInfo, cascadesContext), queryToViewMapping, queryStructInfo); + Set allNullRejectViewSlots = new HashSet<>(predicateNullRejectViewSlots); + allNullRejectViewSlots.addAll(innerJoinNullRejectViewSlots); + if (allNullRejectViewSlots.isEmpty()) { + return Optional.empty(); + } + Set viewOutputSlots = viewStructInfo.getPlanOutputShuttledExpressions().stream() + .filter(Slot.class::isInstance) + .map(Slot.class::cast) + .collect(Collectors.toSet()); + Map viewToQuerySlotReferenceMap = viewToQueryMapping.toSlotReferenceMap(); + Set compensationPredicates = new HashSet<>(); + for (Set requiredViewSlots : getShuttledRequireNoNullableViewSlots( + requireNoNullableViewSlot, viewStructInfo)) { + if (Sets.intersection(requiredViewSlots, allNullRejectViewSlots).isEmpty()) { + return Optional.empty(); + } + if (!Sets.intersection(requiredViewSlots, predicateNullRejectViewSlots).isEmpty()) { + continue; + } + Optional compensationViewSlot = findCompensationViewSlot( + requiredViewSlots, viewOutputSlots, innerJoinNullRejectViewSlots); + if (!compensationViewSlot.isPresent()) { + return Optional.empty(); + } + Slot querySlot = viewToQuerySlotReferenceMap.get(compensationViewSlot.get()); + if (querySlot == null) { + return Optional.empty(); + } + compensationPredicates.add(new Not(new IsNull(querySlot), false)); + } + return Optional.of(compensationPredicates); + } + + private Set getPredicateNullRejectSlots(Set queryPredicates, CascadesContext cascadesContext) { + Set nullRejectSlots = new HashSet<>(); for (Expression queryPredicate : queryPredicates) { - Optional explicitNotNullSlot = TypeUtils.isNotNull(queryPredicate); - explicitNotNullSlot.ifPresent(queryNullRejectSlots::add); + TypeUtils.isNotNull(queryPredicate).ifPresent(nullRejectSlots::add); } - Set queryNullRejectPredicates = ExpressionUtils.inferNotNull(queryPredicates, cascadesContext); - for (Expression queryNullRejectPredicate : queryNullRejectPredicates) { - Optional notNullSlot = TypeUtils.isNotNull(queryNullRejectPredicate); - notNullSlot.ifPresent(queryNullRejectSlots::add); + for (Expression inferredNotNull : ExpressionUtils.inferNotNull(queryPredicates, cascadesContext)) { + TypeUtils.isNotNull(inferredNotNull).ifPresent(nullRejectSlots::add); } + return nullRejectSlots; + } + + private Set getInnerJoinNullRejectSlots(StructInfo queryStructInfo, CascadesContext cascadesContext) { + Set nullRejectSlots = new HashSet<>(); // INNER JOIN conditions guarantee NOT NULL on join-key slots. - // After EliminateOuterJoin converts LEFT→INNER, the JoinEdge objects in the HyperGraph + // After EliminateOuterJoin converts LEFT to INNER, the JoinEdge objects in the HyperGraph // retain the INNER type even though EliminateNotNull removes filter-level NOT NULL predicates. for (JoinEdge joinEdge : queryStructInfo.getHyperGraph().getJoinEdges()) { if (joinEdge.getJoinType().isInnerJoin()) { - queryNullRejectSlots.addAll(ExpressionUtils.inferNotNullSlots( + nullRejectSlots.addAll(ExpressionUtils.inferNotNullSlots( ImmutableSet.copyOf(joinEdge.getExpressions()), cascadesContext)); } } - if (queryNullRejectSlots.isEmpty()) { - return false; + return nullRejectSlots; + } + + private Set getViewBasedNullRejectSlots(Set queryNullRejectSlots, + SlotMapping queryToViewMapping, StructInfo queryStructInfo) { + Set viewBasedSlots = new HashSet<>(); + for (Slot queryNullRejectSlot : queryNullRejectSlots) { + Expression shuttledQuerySlot = ExpressionUtils.shuttleExpressionWithLineage( + queryNullRejectSlot, queryStructInfo.getTopPlan()); + if (!(shuttledQuerySlot instanceof Slot)) { + continue; + } + Expression viewSlot = ExpressionUtils.replace(shuttledQuerySlot, + queryToViewMapping.toSlotReferenceMap()); + if (viewSlot instanceof Slot) { + viewBasedSlots.add((Slot) viewSlot); + } } - Set queryUsedNeedRejectNullSlotsViewBased = ExpressionUtils.shuttleExpressionWithLineage( - new ArrayList<>(queryNullRejectSlots), queryStructInfo.getTopPlan()).stream() - .filter(Slot.class::isInstance) - .map(Slot.class::cast) - .map(slot -> ExpressionUtils.replace(slot, queryToViewMapping.toSlotReferenceMap())) - .filter(Slot.class::isInstance) - .map(Slot.class::cast) - .collect(Collectors.toSet()); - // view slot need shuttle to use table slot, avoid alias influence + return viewBasedSlots; + } + + private Set> getShuttledRequireNoNullableViewSlots(Set> requireNoNullableViewSlot, + StructInfo viewStructInfo) { Set> shuttledRequireNoNullableViewSlot = new HashSet<>(); for (Set requireNullableSlots : requireNoNullableViewSlot) { shuttledRequireNoNullableViewSlot.add( @@ -909,9 +980,41 @@ private boolean containsNullRejectSlot(Set> requireNoNullableViewSlot, viewStructInfo.getTopPlan()).stream().map(Slot.class::cast) .collect(Collectors.toSet())); } - // query pulledUp predicates should have null reject predicates and contains any require noNullable slot - return shuttledRequireNoNullableViewSlot.stream().noneMatch(viewRequiredNullSlotSet -> - Sets.intersection(viewRequiredNullSlotSet, queryUsedNeedRejectNullSlotsViewBased).isEmpty()); + return shuttledRequireNoNullableViewSlot; + } + + private Optional findCompensationViewSlot(Set requiredViewSlots, Set viewOutputSlots, + Set innerJoinNullRejectViewSlots) { + Set outputRequiredSlots = Sets.intersection(requiredViewSlots, viewOutputSlots); + Optional compensationViewSlot = outputRequiredSlots.stream() + .filter(innerJoinNullRejectViewSlots::contains) + .findFirst(); + if (compensationViewSlot.isPresent()) { + return compensationViewSlot; + } + return outputRequiredSlots.stream() + .filter(slot -> isOriginalNonNullableSlotOnInnerJoinProofTable(slot, innerJoinNullRejectViewSlots)) + .findFirst(); + } + + private boolean isOriginalNonNullableSlotOnInnerJoinProofTable(Slot slot, Set innerJoinNullRejectViewSlots) { + if (!(slot instanceof SlotReference)) { + return false; + } + SlotReference slotReference = (SlotReference) slot; + if (!slotReference.getOriginalColumn().map(column -> !column.isAllowNull()).orElse(!slot.nullable())) { + return false; + } + Optional originalTable = slotReference.getOriginalTable(); + if (!originalTable.isPresent()) { + return false; + } + return innerJoinNullRejectViewSlots.stream() + .filter(SlotReference.class::isInstance) + .map(SlotReference.class::cast) + .map(SlotReference::getOriginalTable) + .anyMatch(referenceTable -> referenceTable.isPresent() + && referenceTable.get().equals(originalTable.get())); } /** diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/NullRejectInferenceTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/NullRejectInferenceTest.java index 21cbf0ccf948aa..5fd7628096b5b5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/NullRejectInferenceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/NullRejectInferenceTest.java @@ -24,6 +24,10 @@ import org.apache.doris.nereids.rules.exploration.mv.mapping.RelationMapping; import org.apache.doris.nereids.rules.exploration.mv.mapping.SlotMapping; import org.apache.doris.nereids.sqltest.SqlTestBase; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.IsNull; +import org.apache.doris.nereids.trees.expressions.Not; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.util.PlanChecker; @@ -41,8 +45,10 @@ class NullRejectInferenceTest extends SqlTestBase { void testTwoHopNullRejectFromInnerJoinConditions() { connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); CascadesContext queryContext = createCascadesContext( - "select T1.id from T1 inner join T2 on T1.id = T2.id " - + "inner join T3 on T2.id = T3.id where T3.score = 1", + "select lineitem.l_orderkey, supplier.s_name, nation.n_name from lineitem " + + "inner join supplier on lineitem.l_suppkey = supplier.s_suppkey " + + "inner join nation on supplier.s_nationkey = nation.n_nationkey " + + "where nation.n_name = 'CHINA'", connectContext ); Plan queryPlan = PlanChecker.from(queryContext) @@ -52,8 +58,9 @@ void testTwoHopNullRejectFromInnerJoinConditions() { .getAllPlan().get(0).child(0); CascadesContext viewContext = createCascadesContext( - "select T1.id from T1 left outer join T2 on T1.id = T2.id " - + "left outer join T3 on T2.id = T3.id", + "select lineitem.l_orderkey, supplier.s_name, nation.n_name from lineitem " + + "left outer join supplier on lineitem.l_suppkey = supplier.s_suppkey " + + "left outer join nation on supplier.s_nationkey = nation.n_nationkey", connectContext ); Plan viewPlan = PlanChecker.from(viewContext) @@ -79,6 +86,111 @@ void testTwoHopNullRejectFromInnerJoinConditions() { SplitPredicate compensatePredicates = TEST_RULE.predicatesCompensateForTest( queryStructInfo, viewStructInfo, viewToQuery, comparisonResult, queryContext); Assertions.assertFalse(compensatePredicates.isInvalid()); + Assertions.assertTrue(compensatePredicates.toList().stream() + .anyMatch(expression -> isNotNullOnSlot(expression, "s_name"))); + } + + @Test + void testNullRejectCompensationForInnerJoinFullJoinRewrite() { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext queryContext = createCascadesContext( + "select lineitem.l_shipdate, orders.o_orderdate from lineitem " + + "inner join orders on lineitem.l_orderkey = orders.o_orderkey " + + "where orders.o_orderdate = '2023-10-17'", + connectContext + ); + Plan queryPlan = PlanChecker.from(queryContext) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + + CascadesContext viewContext = createCascadesContext( + "select lineitem.l_shipdate, orders.o_orderdate from lineitem " + + "full outer join orders on lineitem.l_orderkey = orders.o_orderkey", + connectContext + ); + Plan viewPlan = PlanChecker.from(viewContext) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + + StructInfo queryStructInfo = StructInfo.of(queryPlan, queryPlan, queryContext); + StructInfo viewStructInfo = StructInfo.of(viewPlan, viewPlan, viewContext); + RelationMapping relationMapping = RelationMapping.generate( + queryStructInfo.getRelations(), viewStructInfo.getRelations(), 8).get(0); + SlotMapping queryToView = SlotMapping.generate(relationMapping); + SlotMapping viewToQuery = queryToView.inverse(); + LogicalCompatibilityContext compatibilityContext = LogicalCompatibilityContext.from( + relationMapping, viewToQuery, queryStructInfo, viewStructInfo); + ComparisonResult comparisonResult = StructInfo.isGraphLogicalEquals( + queryStructInfo, viewStructInfo, compatibilityContext); + + Assertions.assertFalse(comparisonResult.isInvalid()); + Assertions.assertFalse(comparisonResult.getViewNoNullableSlot().isEmpty()); + + SplitPredicate compensatePredicates = TEST_RULE.predicatesCompensateForTest( + queryStructInfo, viewStructInfo, viewToQuery, comparisonResult, queryContext); + Assertions.assertFalse(compensatePredicates.isInvalid()); + Assertions.assertTrue(compensatePredicates.toList().stream() + .anyMatch(expression -> isNotNullOnSlot(expression, "l_shipdate"))); + } + + @Test + void testNullRejectCompensationForInnerJoinFullJoinRewriteOnRightSide() { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext queryContext = createCascadesContext( + "select lineitem.l_shipdate, orders.o_orderdate from lineitem " + + "inner join orders on lineitem.l_orderkey = orders.o_orderkey " + + "where lineitem.l_shipdate = '2023-10-17'", + connectContext + ); + Plan queryPlan = PlanChecker.from(queryContext) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + + CascadesContext viewContext = createCascadesContext( + "select lineitem.l_shipdate, orders.o_orderdate from lineitem " + + "full outer join orders on lineitem.l_orderkey = orders.o_orderkey", + connectContext + ); + Plan viewPlan = PlanChecker.from(viewContext) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + + StructInfo queryStructInfo = StructInfo.of(queryPlan, queryPlan, queryContext); + StructInfo viewStructInfo = StructInfo.of(viewPlan, viewPlan, viewContext); + RelationMapping relationMapping = RelationMapping.generate( + queryStructInfo.getRelations(), viewStructInfo.getRelations(), 8).get(0); + SlotMapping queryToView = SlotMapping.generate(relationMapping); + SlotMapping viewToQuery = queryToView.inverse(); + LogicalCompatibilityContext compatibilityContext = LogicalCompatibilityContext.from( + relationMapping, viewToQuery, queryStructInfo, viewStructInfo); + ComparisonResult comparisonResult = StructInfo.isGraphLogicalEquals( + queryStructInfo, viewStructInfo, compatibilityContext); + + Assertions.assertFalse(comparisonResult.isInvalid()); + Assertions.assertFalse(comparisonResult.getViewNoNullableSlot().isEmpty()); + + SplitPredicate compensatePredicates = TEST_RULE.predicatesCompensateForTest( + queryStructInfo, viewStructInfo, viewToQuery, comparisonResult, queryContext); + Assertions.assertFalse(compensatePredicates.isInvalid()); + Assertions.assertTrue(compensatePredicates.toList().stream() + .anyMatch(expression -> isNotNullOnSlot(expression, "o_orderdate"))); + } + + private static boolean isNotNullOnSlot(Expression expression, String slotName) { + if (!(expression instanceof Not) || ((Not) expression).isGeneratedIsNotNull() + || !(((Not) expression).child() instanceof IsNull)) { + return false; + } + Expression slot = ((IsNull) ((Not) expression).child()).child(); + return slot instanceof SlotReference && slotName.equals(((SlotReference) slot).getName()); } private static class TestMaterializedViewRule extends AbstractMaterializedViewRule { diff --git a/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_self_conn.groovy b/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_self_conn.groovy index 6bac965910f7e9..e63203ada1cfa9 100644 --- a/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_self_conn.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_self_conn.groovy @@ -308,7 +308,10 @@ suite("partition_mv_rewrite_dimension_self_conn") { } else { for (int j = 0; j < join_type_stmt_list.size(); j++) { logger.info("j:" + j) - if (i == j || (j == 1 && i in [0, 2, 3])) { + // INNER query can use an OUTER JOIN MV only when the MV output has a safe slot + // to filter null-generated rows. Here only RIGHT JOIN MV exposes a non-nullable + // left-side slot for that compensation. + if (i == j || (j == 1 && i == 2)) { mv_rewrite_success(join_type_stmt_list[j], join_type_self_conn_mv) compare_res(join_type_stmt_list[j] + " order by 1,2,3") } else { diff --git a/regression-test/suites/nereids_rules_p0/mv/join_infer_derive/inner_join_null_reject_compensation.groovy b/regression-test/suites/nereids_rules_p0/mv/join_infer_derive/inner_join_null_reject_compensation.groovy new file mode 100644 index 00000000000000..a678dd461c872a --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/join_infer_derive/inner_join_null_reject_compensation.groovy @@ -0,0 +1,217 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("inner_join_null_reject_compensation") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "set runtime_filter_mode=OFF" + sql "set enable_nereids_planner=true" + sql "set enable_fallback_to_original_planner=false" + sql "set enable_materialized_view_rewrite=true" + sql "set pre_materialized_view_rewrite_strategy=FORCE_IN_RBO" + sql "set enable_nereids_timeout=false" + + sql """drop materialized view if exists mv_inner_join_null_reject_compensation""" + sql """drop materialized view if exists mv_repro_left_join""" + sql """drop materialized view if exists mv_repro_left_join_missing_right_output""" + sql """drop materialized view if exists mv_repro_left_join_nullable_right_output""" + sql """drop table if exists mv_repro_a""" + sql """drop table if exists mv_repro_b""" + sql """drop table if exists orders_inner_join_null_reject""" + sql """drop table if exists lineitem_inner_join_null_reject""" + + sql """ + create table lineitem_inner_join_null_reject ( + l_orderkey int not null, + l_shipdate date not null, + l_suppkey int not null + ) + duplicate key(l_orderkey) + distributed by hash(l_orderkey) buckets 1 + properties ( + "replication_num" = "1" + ) + """ + + sql """ + create table orders_inner_join_null_reject ( + o_orderkey int not null, + o_orderdate date not null + ) + duplicate key(o_orderkey) + distributed by hash(o_orderkey) buckets 1 + properties ( + "replication_num" = "1" + ) + """ + + sql """ + create table mv_repro_a ( + id int null, + k int null + ) + duplicate key(id) + distributed by hash(id) buckets 1 + properties ( + "replication_num" = "1" + ) + """ + + sql """ + create table mv_repro_b ( + k int null, + v int null + ) + duplicate key(k) + distributed by hash(k) buckets 1 + properties ( + "replication_num" = "1" + ) + """ + + sql """ + insert into lineitem_inner_join_null_reject values + (1, '2023-10-17', 10), + (999, '2023-10-17', 20) + """ + + sql """ + insert into orders_inner_join_null_reject values + (1, '2023-10-17'), + (888, '2023-10-17') + """ + + sql """ + insert into mv_repro_a values + (1, 10), + (2, 20) + """ + + sql """ + insert into mv_repro_b values + (10, 100) + """ + + sql """analyze table lineitem_inner_join_null_reject with sync""" + sql """analyze table orders_inner_join_null_reject with sync""" + sql """analyze table mv_repro_a with sync""" + sql """analyze table mv_repro_b with sync""" + + def withUseMvHint = { def stmt, def mvName -> + stmt.replaceFirst("(?i)\\bselect\\b", "select /*+ use_mv(${mvName}) */") + } + + def compare_res_with_forced_mv = { def stmt, def mvName -> + def stmtWithUseMvHint = withUseMvHint(stmt, mvName) + sql "set enable_materialized_view_rewrite=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "set enable_materialized_view_rewrite=true" + mv_rewrite_success(stmtWithUseMvHint, mvName) + def mv_origin_res = sql stmtWithUseMvHint + logger.info("mv_origin_res: " + mv_origin_res) + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + } + + def mvName = "mv_inner_join_null_reject_compensation" + def mvSql = """ + select l.l_shipdate, l.l_suppkey, o.o_orderdate + from lineitem_inner_join_null_reject l + full outer join orders_inner_join_null_reject o + on l.l_orderkey = o.o_orderkey + """ + + create_async_mv(db, mvName, mvSql) + + def queryNeedLeftSideCompensation = """ + select l.l_shipdate, l.l_suppkey, o.o_orderdate + from lineitem_inner_join_null_reject l + inner join orders_inner_join_null_reject o + on l.l_orderkey = o.o_orderkey + where o.o_orderdate = '2023-10-17' + order by 1, 2, 3 + """ + + def queryNeedRightSideCompensation = """ + select l.l_shipdate, l.l_suppkey, o.o_orderdate + from lineitem_inner_join_null_reject l + inner join orders_inner_join_null_reject o + on l.l_orderkey = o.o_orderkey + where l.l_shipdate = '2023-10-17' + order by 1, 2, 3 + """ + + compare_res_with_forced_mv(queryNeedLeftSideCompensation, mvName) + compare_res_with_forced_mv(queryNeedRightSideCompensation, mvName) + + def leftJoinMvName = "mv_repro_left_join" + def leftJoinMvSql = """ + select + a.id as a_id, + a.k as a_k, + b.k as b_k, + b.v as b_v + from mv_repro_a a + left join mv_repro_b b + on a.k = b.k + """ + + create_async_mv(db, leftJoinMvName, leftJoinMvSql) + + def innerJoinQueryOnLeftJoinMv = """ + select a.id + from mv_repro_a a + inner join mv_repro_b b + on a.k = b.k + order by 1 + """ + + compare_res_with_forced_mv(innerJoinQueryOnLeftJoinMv, leftJoinMvName) + + def leftJoinMvWithoutRightOutputName = "mv_repro_left_join_missing_right_output" + def leftJoinMvWithoutRightOutputSql = """ + select + a.id as a_id, + a.k as a_k + from mv_repro_a a + left join mv_repro_b b + on a.k = b.k + """ + + create_async_mv(db, leftJoinMvWithoutRightOutputName, leftJoinMvWithoutRightOutputSql) + mv_rewrite_fail(innerJoinQueryOnLeftJoinMv, leftJoinMvWithoutRightOutputName) + + def leftJoinMvWithNullableRightOutputName = "mv_repro_left_join_nullable_right_output" + def leftJoinMvWithNullableRightOutputSql = """ + select + a.id as a_id, + a.k as a_k, + b.v as b_v + from mv_repro_a a + left join mv_repro_b b + on a.k = b.k + """ + + create_async_mv(db, leftJoinMvWithNullableRightOutputName, leftJoinMvWithNullableRightOutputSql) + mv_rewrite_fail(innerJoinQueryOnLeftJoinMv, leftJoinMvWithNullableRightOutputName) +}