diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java index 071c450a220102..1b3bac4cd4d582 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.cost.Cost; import org.apache.doris.nereids.cost.CostCalculator; import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.DistributionSpecHash.ShuffleType; import org.apache.doris.nereids.stats.StatsCalculator; @@ -225,9 +226,24 @@ private boolean requireGather(PhysicalProperties requiredChildProperty) { * no matter x.ndv is high or not, it is not worthwhile to shuffle A and B by x * and hence we forbid one phase agg */ private boolean banAggUnionAll(PhysicalHashAggregate aggregate) { - return aggregate.getAggMode() == AggMode.INPUT_TO_RESULT - && children.get(0).getPlan() instanceof PhysicalUnion - && !((PhysicalUnion) children.get(0).getPlan()).isDistinct(); + if (aggregate.getAggMode() == AggMode.INPUT_TO_RESULT && children.get(0).getPlan() instanceof PhysicalUnion + && !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) { + GroupExpression gExprUnion = children.get(0); + List groups = gExprUnion.children(); + Pair> pair = gExprUnion.getLowestCostTable().get(requiredProperties.get(0)); + int i = 0; + // If none of the union inputs have PhysicalDistribute, allow one-phase aggregation + for (Group group : groups) { + GroupExpression groupExpression = group.getBestPlan(pair.second.get(i)); + i++; + if (groupExpression != null && groupExpression.getPlan() instanceof PhysicalDistribute) { + return true; + } + } + + return false; + } + return false; } @Override diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateUnionPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateUnionPlanTest.java new file mode 100644 index 00000000000000..96c8ca2d76ef5f --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateUnionPlanTest.java @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.trees.plans.AggMode; +import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; +import org.apache.doris.nereids.util.MatchingUtils; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.utframe.TestWithFeService; + +import org.junit.jupiter.api.Test; + +/** + * Tests for banAggUnionAll fix in ChildrenPropertiesRegulator. + */ +public class AggregateUnionPlanTest extends TestWithFeService implements MemoPatternMatchSupported { + + @Override + protected void runBeforeAll() throws Exception { + createDatabase("test_agg_union"); + connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION"); + + // Tables with RANDOM distribution: no PhysicalDistribute needed in union inputs + createTable("CREATE TABLE test_agg_union.t1_random (" + + " a INT NULL, b INT NULL" + + ") ENGINE=OLAP DUPLICATE KEY(a, b)" + + " DISTRIBUTED BY RANDOM BUCKETS AUTO" + + " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');"); + createTable("CREATE TABLE test_agg_union.t2_random (" + + " a INT NULL, b INT NULL" + + ") ENGINE=OLAP DUPLICATE KEY(a, b)" + + " DISTRIBUTED BY RANDOM BUCKETS AUTO" + + " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');"); + + // Tables with HASH distribution: same key, no PhysicalDistribute needed when + // group-by matches the distribution key + createTable("CREATE TABLE test_agg_union.t1_hash (" + + " a INT NULL, b INT NULL" + + ") ENGINE=OLAP DUPLICATE KEY(a, b)" + + " DISTRIBUTED BY HASH(a) BUCKETS 3" + + " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');"); + createTable("CREATE TABLE test_agg_union.t2_hash (" + + " a INT NULL, b INT NULL" + + ") ENGINE=OLAP DUPLICATE KEY(a, b)" + + " DISTRIBUTED BY HASH(a) BUCKETS 3" + + " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');"); + } + + @Test + public void testAggUnionRandomDistributeUseOnePhase() { + // Reproduces the exact bug scenario described in the issue + String sql = "SELECT a, b FROM test_agg_union.t1_random GROUP BY a, b" + + " UNION" + + " SELECT a, b FROM test_agg_union.t2_random GROUP BY a, b"; + + PlanChecker.from(connectContext).checkExplain(sql, planner -> { + // The outer dedup agg should be one-phase (INPUT_TO_RESULT): + // - Before fix: BUFFER_TO_RESULT (two-phase, with redundant LOCAL agg) + // - After fix: INPUT_TO_RESULT (one-phase, no redundant LOCAL agg) + MatchingUtils.assertMatches(planner.getOptimizedPlan(), + physicalResultSink( + physicalHashAggregate(any()) + .when(agg -> agg.getAggMode() == AggMode.INPUT_TO_RESULT) + .when(agg -> agg.child(0) instanceof PhysicalUnion + || hasUnionDescendant(agg.child(0))) + )); + }); + } + + @Test + public void testOuterAggOverUnionAllRandomUsesTwoPhase() { + String sql = "SELECT a, b FROM" + + " (SELECT a, b FROM test_agg_union.t1_random" + + " UNION ALL" + + " SELECT a, b FROM test_agg_union.t2_random) t" + + " GROUP BY a, b"; + + PlanChecker.from(connectContext).checkExplain(sql, planner -> { + MatchingUtils.assertMatches(planner.getOptimizedPlan(), + physicalResultSink( + physicalHashAggregate(any()) + .when(agg -> agg.getAggMode() == AggMode.BUFFER_TO_RESULT) + )); + }); + } + + /** + * Walk up through PhysicalProject nodes to find if a PhysicalUnion sits below. + * Used to handle optional project nodes that the optimizer may insert between + * the dedup agg and the union. + */ + private boolean hasUnionDescendant(org.apache.doris.nereids.trees.plans.Plan plan) { + if (plan instanceof PhysicalUnion) { + return true; + } + if (plan instanceof org.apache.doris.nereids.trees.plans.physical.PhysicalProject) { + return hasUnionDescendant(plan.child(0)); + } + return false; + } +} diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out index 7a6c63c2385f24..7df97e1416a021 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out @@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecHash] --------hashAgg[LOCAL] ----------hashAgg[GLOBAL] -------------hashAgg[LOCAL] ---------------PhysicalUnion -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[web_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] +------------PhysicalUnion +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Home')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (1998, 1999)) +----------------------------PhysicalOlapScan[date_dim] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Home')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (1998, 1999)) +----------------------------PhysicalOlapScan[date_dim] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Home')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (1998, 1999)) +----------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalTopN[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out index 7a6c63c2385f24..7df97e1416a021 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out @@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecHash] --------hashAgg[LOCAL] ----------hashAgg[GLOBAL] -------------hashAgg[LOCAL] ---------------PhysicalUnion -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[web_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] +------------PhysicalUnion +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Home')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (1998, 1999)) +----------------------------PhysicalOlapScan[date_dim] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Home')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (1998, 1999)) +----------------------------PhysicalOlapScan[date_dim] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Home')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (1998, 1999)) +----------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalTopN[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out index b45f54f94a72ed..8bc4e87f63ec57 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out @@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecHash] --------hashAgg[LOCAL] ----------hashAgg[GLOBAL] -------------hashAgg[LOCAL] ---------------PhysicalUnion -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Sports')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (2000, 2001)) -------------------------------PhysicalOlapScan[date_dim] -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN shuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Sports')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (2000, 2001)) -------------------------------PhysicalOlapScan[date_dim] -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[LEFT_OUTER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[web_returns] -------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Sports')) -----------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------filter(d_year IN (2000, 2001)) -------------------------------PhysicalOlapScan[date_dim] +------------PhysicalUnion +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Sports')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (2000, 2001)) +----------------------------PhysicalOlapScan[date_dim] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN shuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Sports')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (2000, 2001)) +----------------------------PhysicalOlapScan[date_dim] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_OUTER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_returns] +----------------------------PhysicalProject +------------------------------filter((item.i_category = 'Sports')) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(d_year IN (2000, 2001)) +----------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalTopN[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather]