From 1faf7e2dc3e47bdea6a9b72130986799d1f590cd Mon Sep 17 00:00:00 2001 From: englefly Date: Fri, 24 Apr 2026 22:26:24 +0800 Subject: [PATCH 1/5] [fix](fe) Prune empty and cascading CTE plans Issue Number: None Related PR: None Problem Summary: Fix optimizer-side CTE pruning so empty-relation producers, zero-consumer anchors, and cascading inline opportunities are normalized to a fixpoint before memoization, and add regression coverage for empty and cascading CTE elimination. None - Test: No need to test (user requested to skip compile and test) - Behavior changed: Yes (empty and cascade-pruned CTEs are normalized before memoization) - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> [fix](fe) Fix cascading CTE normalize fixpoint Issue Number: close #xxx Related PR: #xxx Problem Summary: CTE normalization used Plan.equals() as the fixpoint check. Some logical plan nodes do not compare children in equals(), so CTE rewrites under a kept parent node could be missed and cascading inline/prune normalization could stop too early. Fix cascading CTE pruning/inline normalization when child CTE changes occur under logical nodes whose equals() does not compare children. - Test: Regression test / FE checkstyle - ./run-regression-test.sh --run -d query_p0/cte -s test_cbo_cte_inline_prune - cd fe && mvn checkstyle:check -pl fe-core - Behavior changed: Yes. CTE normalization now uses explicit rewrite state instead of shallow Plan.equals() for fixpoint detection. - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> [fix](regression) Update repeat CTE shape expectation Issue Number: close #xxx Related PR: #xxx Problem Summary: CTE normalization now inlines the single-consumer CTE in the repeat output slot shape case, so the expected shape output needed to be updated while query results stay unchanged. None - Test: Regression test - ./run-regression-test.sh --run -d query_p0/repeat -s test_repeat_output_slot - Behavior changed: No. Test expectation only. - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> fix-shape --- .../nereids/jobs/executor/Optimizer.java | 28 +++++- .../nereids/rules/rewrite/CTEInliner.java | 86 +++++++++++++++- .../org/apache/doris/qe/SessionVariable.java | 2 +- .../agg_with_unique_function.out | 8 +- .../repeat/test_repeat_output_slot.out | 28 +++--- .../cte/test_cbo_cte_inline_prune.groovy | 97 +++++++++++++++++++ 6 files changed, 220 insertions(+), 29 deletions(-) create mode 100644 regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java index 9d7f95970c4048..aeee5856ea6e14 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java @@ -66,6 +66,7 @@ public Optimizer(CascadesContext cascadesContext) { */ public void execute() { MoreFieldsThread.keepFunctionSignature(() -> { + cascadesContext.setRewritePlan(normalizeCtePlan(cascadesContext.getRewritePlan())); // generate inlined CTE alternative for CBO comparison Plan cboInlinedPlan = generateCTEInlineAlternative(); // init memo @@ -197,7 +198,7 @@ private Plan generateFullCTEInline() { CTEInliner cteInliner = new CTEInliner(cascadesContext.getStatementContext()); Plan inlinedPlan = cteInliner.generateInlinedPlan(rewritePlan); if (inlinedPlan != null) { - return rewriteInlinedPlan(inlinedPlan); + return normalizeCtePlan(rewriteInlinedPlan(inlinedPlan)); } return null; } @@ -212,7 +213,7 @@ private Plan generateSelectiveCTEInline() { if (inlinedPlan != null) { inlinedPlan = rewriteInlinedPlan(inlinedPlan); if (inlinedPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { - inlinedPlan = eliminateEmptyRelation(inlinedPlan); + inlinedPlan = normalizeCtePlan(inlinedPlan); cascadesContext.setRewritePlan(inlinedPlan); return null; } @@ -220,6 +221,29 @@ private Plan generateSelectiveCTEInline() { return null; } + private Plan normalizeCtePlan(Plan plan) { + Plan currentPlan = plan; + while (true) { + CTEInliner cteInliner = new CTEInliner(cascadesContext.getStatementContext()); + CTEInliner.InlineResult inlineResult = cteInliner.inlineByCurrentConsumerCount(currentPlan); + Plan normalizedPlan = inlineResult.getPlan(); + boolean changed = inlineResult.isChanged(); + if (normalizedPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { + String beforeEliminate = normalizedPlan.treeString(); + normalizedPlan = eliminateEmptyRelation(normalizedPlan); + changed = changed || !beforeEliminate.equals(normalizedPlan.treeString()); + } + // Do not use Plan.equals() as a fixpoint check here. Some logical nodes, + // e.g. LogicalCTEAnchor and LogicalSubQueryAlias, intentionally ignore + // children in equals(), so a child CTE rewrite under a kept parent may be + // missed and block cascading consumer-count-based inlining. + if (!changed) { + return normalizedPlan; + } + currentPlan = normalizedPlan; + } + } + private Plan eliminateEmptyRelation(Plan plan) { CascadesContext ctx = CascadesContext.initContext( cascadesContext.getStatementContext(), plan, PhysicalProperties.ANY); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInliner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInliner.java index 47ec88c01596cc..36e082ecb33bd4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInliner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInliner.java @@ -31,6 +31,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer; +import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; @@ -41,8 +42,10 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; /** * Generate an inlined alternative plan for CTE optimization. @@ -65,6 +68,7 @@ public class CTEInliner extends DefaultPlanRewriter { private final StatementContext statementContext; // Map from CTEId to the CTE producer node (extracted from CTEAnchor.left()) private final Map> cteProducers = new HashMap<>(); + private final Set cteIdsToRemove = new HashSet<>(); private final boolean unionAllOnly; public CTEInliner(StatementContext statementContext) { @@ -81,6 +85,7 @@ public CTEInliner(StatementContext statementContext, boolean unionAllOnly) { * Returns null if no CTEs can be inlined. */ public Plan generateInlinedPlan(Plan plan) { + clearRewriteCandidates(); // First pass: collect all CTE producers that can be inlined collectCTEProducers(plan); @@ -92,6 +97,44 @@ public Plan generateInlinedPlan(Plan plan) { return plan.accept(this, null); } + /** + * Recursively remove unused CTE anchors and inline CTEs whose live consumer count + * is small enough after rewrite rules change the plan shape. + */ + public InlineResult inlineByCurrentConsumerCount(Plan plan) { + Plan currentPlan = plan; + boolean changed = false; + while (collectConsumerDrivenCandidates(currentPlan)) { + changed = true; + currentPlan = currentPlan.accept(this, null); + } + return new InlineResult(currentPlan, changed); + } + + /** Result of one consumer-count-driven CTE normalization round. */ + public static class InlineResult { + private final Plan plan; + private final boolean changed; + + public InlineResult(Plan plan, boolean changed) { + this.plan = plan; + this.changed = changed; + } + + public Plan getPlan() { + return plan; + } + + public boolean isChanged() { + return changed; + } + } + + private void clearRewriteCandidates() { + cteProducers.clear(); + cteIdsToRemove.clear(); + } + private void collectCTEProducers(Plan plan) { plan.foreach(p -> { if (p instanceof LogicalCTEAnchor) { @@ -113,6 +156,40 @@ private void collectCTEProducers(Plan plan) { }); } + private boolean collectConsumerDrivenCandidates(Plan plan) { + clearRewriteCandidates(); + Map> allCteProducers = new HashMap<>(); + Map cteConsumerCounts = new HashMap<>(); + plan.foreach(p -> { + if (p instanceof LogicalCTEAnchor) { + LogicalCTEAnchor anchor = (LogicalCTEAnchor) p; + allCteProducers.put(anchor.getCteId(), (LogicalCTEProducer) anchor.left()); + } else if (p instanceof LogicalCTEConsumer) { + LogicalCTEConsumer consumer = (LogicalCTEConsumer) p; + cteConsumerCounts.merge(consumer.getCteId(), 1, Integer::sum); + } + }); + + int threshold = statementContext.getConnectContext().getSessionVariable().inlineCTEReferencedThreshold; + for (Map.Entry> entry : allCteProducers.entrySet()) { + CTEId cteId = entry.getKey(); + LogicalCTEProducer producer = entry.getValue(); + int consumerCount = cteConsumerCounts.getOrDefault(cteId, 0); + if (consumerCount == 0) { + cteIdsToRemove.add(cteId); + } else if (producer.child() instanceof LogicalEmptyRelation + || (consumerCount <= threshold && canInline(producer))) { + cteProducers.put(cteId, producer); + } + } + return !cteProducers.isEmpty() || !cteIdsToRemove.isEmpty(); + } + + private boolean canInline(LogicalCTEProducer producer) { + return !statementContext.isForceMaterializeCTE(producer.getCteId()) + && !containsNondeterministicFunction(producer); + } + private boolean containsNondeterministicFunction(LogicalCTEProducer producer) { List nondeterministicFunctions = new ArrayList<>(); producer.accept(NondeterministicFunctionCollector.INSTANCE, nondeterministicFunctions); @@ -127,13 +204,14 @@ private boolean containsUnionAll(LogicalCTEProducer producer) { @Override public Plan visitLogicalCTEAnchor(LogicalCTEAnchor cteAnchor, Void context) { CTEId cteId = cteAnchor.getCteId(); - if (cteProducers.containsKey(cteId)) { - // Inline: skip anchor and producer, process the right (consumer) subtree + if (cteProducers.containsKey(cteId) || cteIdsToRemove.contains(cteId)) { + // Inline or remove: skip anchor and producer, process the right (consumer) subtree return cteAnchor.right().accept(this, null); } else { - // Force materialize: keep the structure, only process the right subtree + // Keep the structure and continue trimming nested CTEs in both children. + Plan left = cteAnchor.left().accept(this, null); Plan right = cteAnchor.right().accept(this, null); - return cteAnchor.withChildren(cteAnchor.left(), right); + return cteAnchor.withChildren(left, right); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 3884d42b80cfdb..9553b2545ad55a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2576,7 +2576,7 @@ public static boolean isEagerAggregationOnJoin() { @VarAttrDef.VarAttr(name = ENABLE_ORDERED_SCAN_RANGE_LOCATIONS) public boolean enableOrderedScanRangeLocations = false; - @VarAttrDef.VarAttr(name = CTE_INLINE_MODE, alias = "cbo_cte_inline_mode", description = { + @VarAttrDef.VarAttr(name = CTE_INLINE_MODE, description = { "CTE内联模式。<0:禁用; =0:仅当CTE体含UNION ALL且filter可消除部分分支时内联; >=1:CBO比较物化与内联", "CTE inline mode. <0: disable; =0: only inline when CTE body contains UNION ALL " + "and consumer filters can eliminate some union branches; " diff --git a/regression-test/data/nereids_rules_p0/unique_function/agg_with_unique_function.out b/regression-test/data/nereids_rules_p0/unique_function/agg_with_unique_function.out index dfe9a28c4bb4ad..adc172f81d6a20 100644 --- a/regression-test/data/nereids_rules_p0/unique_function/agg_with_unique_function.out +++ b/regression-test/data/nereids_rules_p0/unique_function/agg_with_unique_function.out @@ -264,9 +264,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----PhysicalProject[a + random(), a + random() + 0, abs(a + random()), sum(a + random() + 0), sum(a + random())] ------PhysicalQuickSort[MERGE_SORT] --------PhysicalQuickSort[LOCAL_SORT] -----------PhysicalProject[(a + random() + 1.0) AS `(a + random() + 1.0)`, a + random(), a + random() + 0, abs(a + random()) AS `abs(a + random())`, sum(a + random() + 0), sum(a + random())] -------------PhysicalUnion ---------------PhysicalEmptyRelation ---------------filter((.a + random() + 0 > 0.01)) -----------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------PhysicalProject[(a + random() + 1.0) AS `(a + random() + 1.0)`, a + random() + 0 AS `a + random() + 0`, a + random() AS `a + random()`, abs(a + random()) AS `abs(a + random())`, sum(a + random() + 0) AS `sum(a + random() + 0)`, sum(a + random()) AS `sum(a + random())`] +------------filter((.a + random() + 0 > 0.01)) +--------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/query_p0/repeat/test_repeat_output_slot.out b/regression-test/data/query_p0/repeat/test_repeat_output_slot.out index f8ab9595435c91..8019e6bf2b53ff 100644 --- a/regression-test/data/query_p0/repeat/test_repeat_output_slot.out +++ b/regression-test/data/query_p0/repeat/test_repeat_output_slot.out @@ -39,22 +39,17 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) 100000 -- !sql_2_shape -- -PhysicalCteAnchor ( cteId=CTEId#0 ) ---PhysicalCteProducer ( cteId=CTEId#0 ) -----hashAgg[GLOBAL] -------hashAgg[LOCAL] ---------PhysicalProject -----------PhysicalOlapScan[tbl_test_repeat_output_slot] ---PhysicalResultSink -----PhysicalProject -------PhysicalUnion ---------PhysicalProject -----------filter((GROUPING_PREFIX_col_varchar_50__undef_signed__index_inverted_col_datetime_6__undef_signed_col_varchar_50__undef_signed > 0)) -------------hashAgg[GLOBAL] ---------------hashAgg[LOCAL] -----------------PhysicalRepeat -------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------PhysicalEmptyRelation +PhysicalResultSink +--PhysicalProject +----filter((GROUPING_PREFIX_col_varchar_50__undef_signed__index_inverted_col_datetime_6__undef_signed_col_varchar_50__undef_signed > 0)) +------hashAgg[GLOBAL] +--------hashAgg[LOCAL] +----------PhysicalRepeat +------------PhysicalProject +--------------hashAgg[GLOBAL] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------PhysicalOlapScan[tbl_test_repeat_output_slot] -- !sql_2_result -- \N ALL 1 6 \N \N \N @@ -64,4 +59,3 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) 2020-01-04T00:00 ALL 1 6 \N \N b 2020-01-04T00:00 ALL 1 6 \N \N b 2020-01-04T00:00 ALL 1 7 \N \N \N - diff --git a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy new file mode 100644 index 00000000000000..dae71db9c318a5 --- /dev/null +++ b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_cbo_cte_inline_prune") { + sql "DROP TABLE IF EXISTS cte_cbo_inline_tbl" + sql """ + CREATE TABLE cte_cbo_inline_tbl ( + id INT, + val INT + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_num" = "1") + """ + sql "INSERT INTO cte_cbo_inline_tbl VALUES (1, 10), (2, 20), (3, 30)" + + sql "SET cte_inline_mode=1" + sql "SET inline_cte_referenced_threshold=1" + + explain { + sql """ + shape plan + WITH cte_base AS ( + SELECT id, val, 1 AS tag FROM cte_cbo_inline_tbl + ) + SELECT * FROM cte_base WHERE tag = 2 + UNION ALL + SELECT * FROM cte_base WHERE tag = 3 + """ + contains("PhysicalEmptyRelation") + notContains("PhysicalCteProducer") + } + + explain { + sql """ + shape plan + WITH cte_base AS ( + SELECT id, val, 1 AS tag FROM cte_cbo_inline_tbl + ), + cte_keep AS ( + SELECT id, val FROM cte_base WHERE tag = 1 + ), + cte_drop AS ( + SELECT id, val FROM cte_base WHERE tag = 2 + ) + SELECT * FROM cte_keep + UNION ALL + SELECT * FROM cte_keep + UNION ALL + SELECT * FROM cte_drop WHERE 1 = 0 + """ + multiContains("PhysicalCteProducer", 0) + } + + sql "SET cte_inline_mode=0" + explain { + sql """ + shape plan + WITH cte_base AS ( + SELECT id, val, 1 AS tag FROM cte_cbo_inline_tbl + ), + cte_keep AS ( + SELECT id, val FROM cte_base WHERE tag = 1 + ), + cte_drop AS ( + SELECT id, val FROM cte_base WHERE tag = 2 + ), + cte_outer AS ( + SELECT * FROM cte_keep + UNION ALL + SELECT * FROM cte_drop WHERE 1 = 0 + ) + SELECT * FROM cte_outer + UNION ALL + SELECT * FROM cte_outer + """ + // cte_outer is still referenced twice and remains materialized, but + // eliminating cte_drop makes cte_base have only one live consumer. + // The fixpoint check must revisit the kept outer CTE anchor and inline + // cte_base; otherwise two producers would remain. + multiContains("PhysicalCteProducer", 1) + } +} From 833fc19b502172dbf0d7961cd4aa6d3d1d36247d Mon Sep 17 00:00:00 2001 From: englefly Date: Thu, 14 May 2026 17:08:11 +0800 Subject: [PATCH 2/5] fix-514 --- .../nereids/jobs/executor/Optimizer.java | 51 ++++++++++++------- .../cte/test_cbo_cte_inline_prune.groovy | 29 +++++++++++ 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java index aeee5856ea6e14..77857c27ff597b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java @@ -36,6 +36,8 @@ import org.apache.doris.nereids.rules.rewrite.MergeProjectable; import org.apache.doris.nereids.rules.rewrite.PushDownExpressionsInHashCondition; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor; +import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation; import org.apache.doris.nereids.util.MoreFieldsThread; import org.apache.doris.qe.ConnectContext; @@ -66,7 +68,10 @@ public Optimizer(CascadesContext cascadesContext) { */ public void execute() { MoreFieldsThread.keepFunctionSignature(() -> { - cascadesContext.setRewritePlan(normalizeCtePlan(cascadesContext.getRewritePlan())); + Plan rewritePlan = cascadesContext.getRewritePlan(); + if (containsCte(rewritePlan)) { + cascadesContext.setRewritePlan(normalizeCtePlan(rewritePlan)); + } // generate inlined CTE alternative for CBO comparison Plan cboInlinedPlan = generateCTEInlineAlternative(); // init memo @@ -196,9 +201,9 @@ private Plan generateCTEInlineAlternative() { private Plan generateFullCTEInline() { Plan rewritePlan = cascadesContext.getRewritePlan(); CTEInliner cteInliner = new CTEInliner(cascadesContext.getStatementContext()); - Plan inlinedPlan = cteInliner.generateInlinedPlan(rewritePlan); - if (inlinedPlan != null) { - return normalizeCtePlan(rewriteInlinedPlan(inlinedPlan)); + Plan pushedDownInlinedPlan = generateFilterPushedDownInlinedPlan(cteInliner, rewritePlan); + if (pushedDownInlinedPlan != null) { + return normalizeCtePlan(pushedDownInlinedPlan); } return null; } @@ -209,12 +214,11 @@ private Plan generateFullCTEInline() { private Plan generateSelectiveCTEInline() { Plan rewritePlan = cascadesContext.getRewritePlan(); CTEInliner cteInliner = new CTEInliner(cascadesContext.getStatementContext(), true); - Plan inlinedPlan = cteInliner.generateInlinedPlan(rewritePlan); - if (inlinedPlan != null) { - inlinedPlan = rewriteInlinedPlan(inlinedPlan); - if (inlinedPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { - inlinedPlan = normalizeCtePlan(inlinedPlan); - cascadesContext.setRewritePlan(inlinedPlan); + Plan pushedDownInlinedPlan = generateFilterPushedDownInlinedPlan(cteInliner, rewritePlan); + if (pushedDownInlinedPlan != null) { + if (pushedDownInlinedPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { + pushedDownInlinedPlan = normalizeCtePlan(pushedDownInlinedPlan); + cascadesContext.setRewritePlan(pushedDownInlinedPlan); return null; } } @@ -224,30 +228,31 @@ private Plan generateSelectiveCTEInline() { private Plan normalizeCtePlan(Plan plan) { Plan currentPlan = plan; while (true) { + if (currentPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { + currentPlan = eliminateEmptyRelation(currentPlan); + } CTEInliner cteInliner = new CTEInliner(cascadesContext.getStatementContext()); CTEInliner.InlineResult inlineResult = cteInliner.inlineByCurrentConsumerCount(currentPlan); Plan normalizedPlan = inlineResult.getPlan(); - boolean changed = inlineResult.isChanged(); - if (normalizedPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { - String beforeEliminate = normalizedPlan.treeString(); - normalizedPlan = eliminateEmptyRelation(normalizedPlan); - changed = changed || !beforeEliminate.equals(normalizedPlan.treeString()); - } // Do not use Plan.equals() as a fixpoint check here. Some logical nodes, // e.g. LogicalCTEAnchor and LogicalSubQueryAlias, intentionally ignore // children in equals(), so a child CTE rewrite under a kept parent may be // missed and block cascading consumer-count-based inlining. - if (!changed) { + if (!inlineResult.isChanged()) { return normalizedPlan; } currentPlan = normalizedPlan; } } + private boolean containsCte(Plan plan) { + return plan.anyMatch(p -> p instanceof LogicalCTEAnchor || p instanceof LogicalCTEConsumer); + } + private Plan eliminateEmptyRelation(Plan plan) { CascadesContext ctx = CascadesContext.initContext( cascadesContext.getStatementContext(), plan, PhysicalProperties.ANY); - // Use getCteChildrenRewriter for the same reason as rewriteInlinedPlan: + // Use getCteChildrenRewriter for the same reason as pushDownFilterAndPruneInlinedPlan: // getWholeTreeRewriterWithCustomJobs would invoke RewriteCteChildren which // reads stale rewrittenCteConsumer cache from the main Rewriter phase, // reverting the inlined CTE subtrees back to the original structure. @@ -258,6 +263,14 @@ private Plan eliminateEmptyRelation(Plan plan) { return ctx.getRewritePlan(); } + private Plan generateFilterPushedDownInlinedPlan(CTEInliner cteInliner, Plan rewritePlan) { + Plan inlinedPlan = cteInliner.generateInlinedPlan(rewritePlan); + if (inlinedPlan == null) { + return null; + } + return pushDownFilterAndPruneInlinedPlan(inlinedPlan); + } + /** * Run filter pushdown and column pruning on the inlined plan using a temporary * CascadesContext. @@ -270,7 +283,7 @@ private Plan eliminateEmptyRelation(Plan plan) { * phase. That cached outer query still contains LogicalCTEConsumer nodes for the inlined CTE, * preventing the filter from ever reaching the inlined union body. */ - private Plan rewriteInlinedPlan(Plan inlinedPlan) { + private Plan pushDownFilterAndPruneInlinedPlan(Plan inlinedPlan) { CascadesContext inlinedContext = CascadesContext.initContext( cascadesContext.getStatementContext(), inlinedPlan, PhysicalProperties.ANY); Rewriter.getCteChildrenRewriter(inlinedContext, ImmutableList.of( diff --git a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy index dae71db9c318a5..95cd41576c8dbb 100644 --- a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy +++ b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy @@ -28,9 +28,38 @@ suite("test_cbo_cte_inline_prune") { """ sql "INSERT INTO cte_cbo_inline_tbl VALUES (1, 10), (2, 20), (3, 30)" + sql "DROP TABLE IF EXISTS cte_cbo_empty_array_tbl" + sql """ + CREATE TABLE cte_cbo_empty_array_tbl ( + id INT, + vals ARRAY + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_num" = "1") + """ + sql "SET cte_inline_mode=1" sql "SET inline_cte_referenced_threshold=1" + explain { + sql "SELECT count(*) FROM cte_cbo_empty_array_tbl" + contains("VEMPTYSET") + } + + explain { + sql """ + WITH cte_keep AS ( + SELECT id FROM cte_cbo_inline_tbl + ) + SELECT count(*) FROM cte_cbo_empty_array_tbl + UNION ALL + SELECT count(*) FROM cte_keep + """ + contains("VUNION") + contains("constant exprs") + } + explain { sql """ shape plan From 576ac17c2f9f56851b2490a00bee950cdcd436ff Mon Sep 17 00:00:00 2001 From: englefly Date: Thu, 14 May 2026 22:29:10 +0800 Subject: [PATCH 3/5] add comments for regression case --- .../suites/query_p0/cte/test_cbo_cte_inline_prune.groovy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy index 95cd41576c8dbb..dfa3f0b40faa4a 100644 --- a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy +++ b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy @@ -47,6 +47,9 @@ suite("test_cbo_cte_inline_prune") { contains("VEMPTYSET") } + // cte_cbo_empty_array_tbl is empty table. after eliminateEmptyRelation, + // "SELECT count(*) FROM cte_cbo_empty_array_tbl" becomes "SELECT 0". + // This test verifies that the constant exprs are still visible to VUNION and can be used for pruning. explain { sql """ WITH cte_keep AS ( @@ -57,7 +60,7 @@ suite("test_cbo_cte_inline_prune") { SELECT count(*) FROM cte_keep """ contains("VUNION") - contains("constant exprs") + contains("constant exprs") } explain { From c4f99fd805561cb8603e6e8c48b3a8f23d9f4d3f Mon Sep 17 00:00:00 2001 From: englefly Date: Fri, 15 May 2026 11:16:33 +0800 Subject: [PATCH 4/5] unstable case removed --- .../cte/test_cbo_cte_inline_prune.groovy | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy index dfa3f0b40faa4a..ffd4f2acae459c 100644 --- a/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy +++ b/regression-test/suites/query_p0/cte/test_cbo_cte_inline_prune.groovy @@ -47,22 +47,6 @@ suite("test_cbo_cte_inline_prune") { contains("VEMPTYSET") } - // cte_cbo_empty_array_tbl is empty table. after eliminateEmptyRelation, - // "SELECT count(*) FROM cte_cbo_empty_array_tbl" becomes "SELECT 0". - // This test verifies that the constant exprs are still visible to VUNION and can be used for pruning. - explain { - sql """ - WITH cte_keep AS ( - SELECT id FROM cte_cbo_inline_tbl - ) - SELECT count(*) FROM cte_cbo_empty_array_tbl - UNION ALL - SELECT count(*) FROM cte_keep - """ - contains("VUNION") - contains("constant exprs") - } - explain { sql """ shape plan From d8144892927beb269476ce00f20c5ea8cc19a5a7 Mon Sep 17 00:00:00 2001 From: englefly Date: Mon, 18 May 2026 15:27:22 +0800 Subject: [PATCH 5/5] [fix](fe) Refresh CTE context after normalization ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: CTE normalization can remove empty branches or inline/remove anchors after StatementContext has already recorded producer and consumer metadata. Refresh the CTE context from the normalized rewrite plan before memo construction so CBO costing and runtime-filter pushdown use the live CTE shape. ### Release note None ### Check List (For Author) - Test: Unit Test - org.apache.doris.nereids.rules.rewrite.CTEInlineTest#refreshCteConsumersAfterNormalizeEliminatesEmptyBranch - Maven FE validate - Behavior changed: No - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../doris/nereids/StatementContext.java | 11 ++++++ .../nereids/jobs/executor/Optimizer.java | 21 +++++++++- .../rules/rewrite/ClearContextStatus.java | 5 +-- .../nereids/rules/rewrite/CTEInlineTest.java | 38 +++++++++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 3dbf1fc02c7819..2af64752cc47aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -667,6 +667,17 @@ public Map getRewrittenCteConsumer() { return rewrittenCteConsumer; } + /** Clear CTE-related rewrite and memo state before rebuilding it from a new plan tree. */ + public void clearCteEnvironment() { + cteIdToConsumers.clear(); + cteIdToOutputIds.clear(); + cteIdToProducer.clear(); + consumerIdToFilters.clear(); + cteIdToConsumerGroup.clear(); + rewrittenCteProducer.clear(); + rewrittenCteConsumer.clear(); + } + /** * Snapshot current CTE-related environment for temporary rewrite/optimization. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java index 77857c27ff597b..b66a6a30c6074a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.jobs.executor; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.jobs.cascades.DeriveStatsJob; import org.apache.doris.nereids.jobs.cascades.OptimizeGroupJob; import org.apache.doris.nereids.jobs.joinorder.JoinOrderJob; @@ -38,6 +39,7 @@ import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; +import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer; import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation; import org.apache.doris.nereids.util.MoreFieldsThread; import org.apache.doris.qe.ConnectContext; @@ -70,7 +72,9 @@ public void execute() { MoreFieldsThread.keepFunctionSignature(() -> { Plan rewritePlan = cascadesContext.getRewritePlan(); if (containsCte(rewritePlan)) { - cascadesContext.setRewritePlan(normalizeCtePlan(rewritePlan)); + Plan normalizedPlan = normalizeCtePlan(rewritePlan); + cascadesContext.setRewritePlan(normalizedPlan); + refreshCteContext(normalizedPlan); } // generate inlined CTE alternative for CBO comparison Plan cboInlinedPlan = generateCTEInlineAlternative(); @@ -219,6 +223,7 @@ private Plan generateSelectiveCTEInline() { if (pushedDownInlinedPlan.anyMatch(p -> p instanceof LogicalEmptyRelation)) { pushedDownInlinedPlan = normalizeCtePlan(pushedDownInlinedPlan); cascadesContext.setRewritePlan(pushedDownInlinedPlan); + refreshCteContext(pushedDownInlinedPlan); return null; } } @@ -249,6 +254,20 @@ private boolean containsCte(Plan plan) { return plan.anyMatch(p -> p instanceof LogicalCTEAnchor || p instanceof LogicalCTEConsumer); } + private void refreshCteContext(Plan plan) { + StatementContext statementContext = cascadesContext.getStatementContext(); + statementContext.clearCteEnvironment(); + plan.foreach(p -> { + if (p instanceof LogicalCTEAnchor) { + LogicalCTEAnchor anchor = (LogicalCTEAnchor) p; + statementContext.setCteProducer(anchor.getCteId(), (LogicalCTEProducer) anchor.left()); + } else if (p instanceof LogicalCTEConsumer) { + cascadesContext.putCTEIdToConsumer((LogicalCTEConsumer) p); + } + return false; + }); + } + private Plan eliminateEmptyRelation(Plan plan) { CascadesContext ctx = CascadesContext.initContext( cascadesContext.getStatementContext(), plan, PhysicalProperties.ANY); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ClearContextStatus.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ClearContextStatus.java index cb5dcf5526a4d5..72cb6328d4a86c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ClearContextStatus.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ClearContextStatus.java @@ -31,10 +31,7 @@ public class ClearContextStatus implements CustomRewriter { @Override public Plan rewriteRoot(Plan plan, JobContext jobContext) { - jobContext.getCascadesContext().getStatementContext().getRewrittenCteConsumer().clear(); - jobContext.getCascadesContext().getStatementContext().getRewrittenCteProducer().clear(); - jobContext.getCascadesContext().getStatementContext().getCteIdToOutputIds().clear(); - jobContext.getCascadesContext().getStatementContext().getConsumerIdToFilters().clear(); + jobContext.getCascadesContext().getStatementContext().clearCteEnvironment(); return plan; } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java index e9767ef524bd75..7ae9aa1e99569f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java @@ -21,7 +21,9 @@ import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.CTEId; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand; +import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.MemoTestUtils; @@ -29,13 +31,24 @@ import org.apache.doris.qe.OriginStatement; import org.apache.doris.utframe.TestWithFeService; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.Map; +import java.util.Set; + public class CTEInlineTest extends TestWithFeService implements MemoPatternMatchSupported { @Override protected void runBeforeAll() throws Exception { createDatabase("test"); connectContext.setDatabase("test"); + createTable("CREATE TABLE cte_inline_tbl (\n" + + " id int NULL,\n" + + " val int NULL\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(id)\n" + + "DISTRIBUTED BY HASH(id) BUCKETS 1\n" + + "PROPERTIES (\"replication_num\" = \"1\")"); } @Test @@ -81,4 +94,29 @@ public void recCteInline() { ).when(cte -> cte.getCteName().equals("yy")) ); } + + @Test + public void refreshCteConsumersAfterNormalizeEliminatesEmptyBranch() { + int oldCteInlineMode = connectContext.getSessionVariable().cteInlineMode; + int oldInlineCteReferencedThreshold = connectContext.getSessionVariable().inlineCTEReferencedThreshold; + connectContext.getSessionVariable().cteInlineMode = 0; + connectContext.getSessionVariable().inlineCTEReferencedThreshold = 1; + connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION"); + String sql = "with cte as (select id, val from cte_inline_tbl) " + + "select * from cte where id = 1 " + + "union all select * from cte where id = 2 " + + "union all select * from cte where 1 = 0"; + try { + PlanChecker.from(connectContext).checkPlannerResult(sql, planner -> { + Map> consumers = + planner.getCascadesContext().getStatementContext().getCteIdToConsumers(); + Assertions.assertEquals(1, consumers.size()); + Assertions.assertEquals(2, consumers.values().iterator().next().size()); + }); + } finally { + connectContext.getSessionVariable().cteInlineMode = oldCteInlineMode; + connectContext.getSessionVariable().inlineCTEReferencedThreshold = oldInlineCteReferencedThreshold; + connectContext.getSessionVariable().setDisableNereidsRules(""); + } + } }