Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.nereids.cost.Cost;
import org.apache.doris.nereids.cost.CostCalculator;
import org.apache.doris.nereids.jobs.JobContext;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.DistributionSpecHash.ShuffleType;
import org.apache.doris.nereids.stats.StatsCalculator;
Expand Down Expand Up @@ -225,9 +226,24 @@ private boolean requireGather(PhysicalProperties requiredChildProperty) {
* no matter x.ndv is high or not, it is not worthwhile to shuffle A and B by x
* and hence we forbid one phase agg */
private boolean banAggUnionAll(PhysicalHashAggregate<? extends Plan> aggregate) {
return aggregate.getAggMode() == AggMode.INPUT_TO_RESULT
&& children.get(0).getPlan() instanceof PhysicalUnion
&& !((PhysicalUnion) children.get(0).getPlan()).isDistinct();
if (aggregate.getAggMode() == AggMode.INPUT_TO_RESULT && children.get(0).getPlan() instanceof PhysicalUnion
&& !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) {
GroupExpression gExprUnion = children.get(0);
List<Group> groups = gExprUnion.children();
Pair<Cost, List<PhysicalProperties>> pair = gExprUnion.getLowestCostTable().get(requiredProperties.get(0));
int i = 0;
// If none of the union inputs have PhysicalDistribute, allow one-phase aggregation
for (Group group : groups) {
GroupExpression groupExpression = group.getBestPlan(pair.second.get(i));
i++;
if (groupExpression != null && groupExpression.getPlan() instanceof PhysicalDistribute) {
return true;
}
}

return false;
}
return false;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.nereids.trees.plans.AggMode;
import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
import org.apache.doris.nereids.util.MatchingUtils;
import org.apache.doris.nereids.util.MemoPatternMatchSupported;
import org.apache.doris.nereids.util.PlanChecker;
import org.apache.doris.utframe.TestWithFeService;

import org.junit.jupiter.api.Test;

/**
* Tests for banAggUnionAll fix in ChildrenPropertiesRegulator.
*/
public class AggregateUnionPlanTest extends TestWithFeService implements MemoPatternMatchSupported {

@Override
protected void runBeforeAll() throws Exception {
createDatabase("test_agg_union");
connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION");

// Tables with RANDOM distribution: no PhysicalDistribute needed in union inputs
createTable("CREATE TABLE test_agg_union.t1_random ("
+ " a INT NULL, b INT NULL"
+ ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ " DISTRIBUTED BY RANDOM BUCKETS AUTO"
+ " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');");
createTable("CREATE TABLE test_agg_union.t2_random ("
+ " a INT NULL, b INT NULL"
+ ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ " DISTRIBUTED BY RANDOM BUCKETS AUTO"
+ " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');");

// Tables with HASH distribution: same key, no PhysicalDistribute needed when
// group-by matches the distribution key
createTable("CREATE TABLE test_agg_union.t1_hash ("
+ " a INT NULL, b INT NULL"
+ ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ " DISTRIBUTED BY HASH(a) BUCKETS 3"
+ " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');");
createTable("CREATE TABLE test_agg_union.t2_hash ("
+ " a INT NULL, b INT NULL"
+ ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ " DISTRIBUTED BY HASH(a) BUCKETS 3"
+ " PROPERTIES ('replication_allocation' = 'tag.location.default: 1');");
}

@Test
public void testAggUnionRandomDistributeUseOnePhase() {
// Reproduces the exact bug scenario described in the issue
String sql = "SELECT a, b FROM test_agg_union.t1_random GROUP BY a, b"
+ " UNION"
+ " SELECT a, b FROM test_agg_union.t2_random GROUP BY a, b";

PlanChecker.from(connectContext).checkExplain(sql, planner -> {
// The outer dedup agg should be one-phase (INPUT_TO_RESULT):
// - Before fix: BUFFER_TO_RESULT (two-phase, with redundant LOCAL agg)
// - After fix: INPUT_TO_RESULT (one-phase, no redundant LOCAL agg)
MatchingUtils.assertMatches(planner.getOptimizedPlan(),
physicalResultSink(
physicalHashAggregate(any())
.when(agg -> agg.getAggMode() == AggMode.INPUT_TO_RESULT)
.when(agg -> agg.child(0) instanceof PhysicalUnion
|| hasUnionDescendant(agg.child(0)))
));
});
}

@Test
public void testOuterAggOverUnionAllRandomUsesTwoPhase() {
String sql = "SELECT a, b FROM"
+ " (SELECT a, b FROM test_agg_union.t1_random"
+ " UNION ALL"
+ " SELECT a, b FROM test_agg_union.t2_random) t"
+ " GROUP BY a, b";

PlanChecker.from(connectContext).checkExplain(sql, planner -> {
MatchingUtils.assertMatches(planner.getOptimizedPlan(),
physicalResultSink(
physicalHashAggregate(any())
.when(agg -> agg.getAggMode() == AggMode.BUFFER_TO_RESULT)
));
});
}

/**
* Walk up through PhysicalProject nodes to find if a PhysicalUnion sits below.
* Used to handle optional project nodes that the optimizer may insert between
* the dedup agg and the union.
*/
private boolean hasUnionDescendant(org.apache.doris.nereids.trees.plans.Plan plan) {
if (plan instanceof PhysicalUnion) {
return true;
}
if (plan instanceof org.apache.doris.nereids.trees.plans.physical.PhysicalProject) {
return hasUnionDescendant(plan.child(0));
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecHash]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------hashAgg[GLOBAL]
------------------PhysicalDistribute[DistributionSpecHash]
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=()
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[catalog_returns]
------------------------------PhysicalProject
--------------------------------filter((item.i_category = 'Home'))
----------------------------------PhysicalOlapScan[item]
--------------------------PhysicalProject
----------------------------filter(d_year IN (1998, 1999))
------------------------------PhysicalOlapScan[date_dim]
----------------hashAgg[GLOBAL]
------------------PhysicalDistribute[DistributionSpecHash]
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=()
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_returns]
------------------------------PhysicalProject
--------------------------------filter((item.i_category = 'Home'))
----------------------------------PhysicalOlapScan[item]
--------------------------PhysicalProject
----------------------------filter(d_year IN (1998, 1999))
------------------------------PhysicalOlapScan[date_dim]
----------------hashAgg[GLOBAL]
------------------PhysicalDistribute[DistributionSpecHash]
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[web_returns]
------------------------------PhysicalProject
--------------------------------filter((item.i_category = 'Home'))
----------------------------------PhysicalOlapScan[item]
--------------------------PhysicalProject
----------------------------filter(d_year IN (1998, 1999))
------------------------------PhysicalOlapScan[date_dim]
------------PhysicalUnion
--------------hashAgg[GLOBAL]
----------------PhysicalDistribute[DistributionSpecHash]
------------------hashAgg[LOCAL]
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk]
----------------------------PhysicalProject
------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=()
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_returns]
----------------------------PhysicalProject
------------------------------filter((item.i_category = 'Home'))
--------------------------------PhysicalOlapScan[item]
------------------------PhysicalProject
--------------------------filter(d_year IN (1998, 1999))
----------------------------PhysicalOlapScan[date_dim]
--------------hashAgg[GLOBAL]
----------------PhysicalDistribute[DistributionSpecHash]
------------------hashAgg[LOCAL]
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
----------------------------PhysicalProject
------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=()
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store_returns]
----------------------------PhysicalProject
------------------------------filter((item.i_category = 'Home'))
--------------------------------PhysicalOlapScan[item]
------------------------PhysicalProject
--------------------------filter(d_year IN (1998, 1999))
----------------------------PhysicalOlapScan[date_dim]
--------------hashAgg[GLOBAL]
----------------PhysicalDistribute[DistributionSpecHash]
------------------hashAgg[LOCAL]
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ws_item_sk]
----------------------------PhysicalProject
------------------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[web_returns]
----------------------------PhysicalProject
------------------------------filter((item.i_category = 'Home'))
--------------------------------PhysicalOlapScan[item]
------------------------PhysicalProject
--------------------------filter(d_year IN (1998, 1999))
----------------------------PhysicalOlapScan[date_dim]
--PhysicalResultSink
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
Expand Down
Loading
Loading