diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 9e06f9bec7830..3fb2e991af554 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -217,7 +217,7 @@ object Union { } /** - * Logical plan for unioning two plans, without a distinct. This is UNION ALL in SQL. + * Logical plan for unioning multiple plans, without a distinct. This is UNION ALL in SQL. * * @param byName Whether resolves columns in the children by column names. * @param allowMissingCol Allows missing columns in children query plans. If it is true, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala index 34baf5b90e54e..05fc1f7958fef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala @@ -79,7 +79,15 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p) - override def visitUnion(p: Union): Statistics = fallback(p) + override def visitUnion(p: Union): Statistics = { + val stats = p.children.map(_.stats) + val rowCount = if (stats.exists(_.rowCount.isEmpty)) { + None + } else { + Some(stats.map(_.rowCount.get).sum) + } + Statistics(sizeInBytes = stats.map(_.sizeInBytes).sum, rowCount = rowCount) + } override def visitWindow(p: Window): Statistics = fallback(p) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala index 91f8fc406a43d..1d780142aede0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala @@ -141,6 +141,17 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase { expectedStatsCboOff = Statistics(sizeInBytes = 120)) } + test("SPARK-34031: Union operator missing rowCount when enable CBO") { + val union = Union(plan :: plan :: plan :: Nil) + val childrenSize = union.children.size + val sizeInBytes = plan.size.get * childrenSize + val rowCount = Some(plan.rowCount * childrenSize) + checkStats( + union, + expectedStatsCboOn = Statistics(sizeInBytes = sizeInBytes, rowCount = rowCount), + expectedStatsCboOff = Statistics(sizeInBytes = sizeInBytes)) + } + /** Check estimated stats when cbo is turned on/off. */ private def checkStats( plan: LogicalPlan, diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt index 61e5ae0121819..52dfff442bf3a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt @@ -1,45 +1,43 @@ == Physical Plan == -* Sort (41) -+- Exchange (40) - +- * Project (39) - +- * SortMergeJoin Inner (38) - :- * Sort (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * HashAggregate (18) - : : +- Exchange (17) - : : +- * HashAggregate (16) - : : +- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- Union (9) - : : : :- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- * Project (8) - : : : +- * Filter (7) - : : : +- * ColumnarToRow (6) - : : : +- Scan parquet default.catalog_sales (5) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.date_dim (10) - : +- BroadcastExchange (23) - : +- * Project (22) - : +- * Filter (21) - : +- * ColumnarToRow (20) - : +- Scan parquet default.date_dim (19) - +- * Sort (37) - +- Exchange (36) - +- * Project (35) - +- * BroadcastHashJoin Inner BuildRight (34) - :- * HashAggregate (28) - : +- ReusedExchange (27) - +- BroadcastExchange (33) - +- * Project (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.date_dim (29) +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.catalog_sales (5) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * HashAggregate (27) + : +- ReusedExchange (26) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.date_dim (28) (1) Scan parquet default.web_sales @@ -116,9 +114,9 @@ Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum (17) Exchange Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [id=#27] -(18) HashAggregate [codegen id : 6] +(18) HashAggregate [codegen id : 12] Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] @@ -147,82 +145,74 @@ Input [2]: [d_week_seq#42, d_year#43] Input [1]: [d_week_seq#42] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] -(24) BroadcastHashJoin [codegen id : 6] +(24) BroadcastHashJoin [codegen id : 12] Left keys [1]: [d_week_seq#10] Right keys [1]: [d_week_seq#42] Join condition: None -(25) Project [codegen id : 6] +(25) Project [codegen id : 12] Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] -(26) Sort [codegen id : 6] -Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52] -Arguments: [d_week_seq1#45 ASC NULLS FIRST], false, 0 - -(27) ReusedExchange [Reuses operator id: 17] +(26) ReusedExchange [Reuses operator id: 17] Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] -(28) HashAggregate [codegen id : 12] +(27) HashAggregate [codegen id : 11] Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66] Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41] -(29) Scan parquet default.date_dim +(28) Scan parquet default.date_dim Output [2]: [d_week_seq#67, d_year#68] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] ReadSchema: struct -(30) ColumnarToRow [codegen id : 11] +(29) ColumnarToRow [codegen id : 10] Input [2]: [d_week_seq#67, d_year#68] -(31) Filter [codegen id : 11] +(30) Filter [codegen id : 10] Input [2]: [d_week_seq#67, d_year#68] Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67)) -(32) Project [codegen id : 11] +(31) Project [codegen id : 10] Output [1]: [d_week_seq#67] Input [2]: [d_week_seq#67, d_year#68] -(33) BroadcastExchange +(32) BroadcastExchange Input [1]: [d_week_seq#67] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69] -(34) BroadcastHashJoin [codegen id : 12] +(33) BroadcastHashJoin [codegen id : 11] Left keys [1]: [d_week_seq#10] Right keys [1]: [d_week_seq#67] Join condition: None -(35) Project [codegen id : 12] +(34) Project [codegen id : 11] Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77] Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67] -(36) Exchange -Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -Arguments: hashpartitioning((d_week_seq2#70 - 53), 5), true, [id=#78] - -(37) Sort [codegen id : 13] +(35) BroadcastExchange Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -Arguments: [(d_week_seq2#70 - 53) ASC NULLS FIRST], false, 0 +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#78] -(38) SortMergeJoin [codegen id : 14] +(36) BroadcastHashJoin [codegen id : 12] Left keys [1]: [d_week_seq1#45] Right keys [1]: [(d_week_seq2#70 - 53)] Join condition: None -(39) Project [codegen id : 14] +(37) Project [codegen id : 12] Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85] Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -(40) Exchange +(38) Exchange Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] -Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86] +Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#86] -(41) Sort [codegen id : 15] +(39) Sort [codegen id : 13] Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt index 3389774c46469..424a535e14847 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt @@ -1,67 +1,61 @@ -WholeStageCodegen (15) +WholeStageCodegen (13) Sort [d_week_seq1] InputAdapter Exchange [d_week_seq1] #1 - WholeStageCodegen (14) + WholeStageCodegen (12) Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] - SortMergeJoin [d_week_seq1,d_week_seq2] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen (4) + HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ws_sold_date_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] + WholeStageCodegen (2) + Project [cs_sold_date_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [d_week_seq] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] InputAdapter - WholeStageCodegen (6) - Sort [d_week_seq1] + BroadcastExchange #5 + WholeStageCodegen (11) Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] BroadcastHashJoin [d_week_seq,d_week_seq] HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter - Exchange [d_week_seq] #2 - WholeStageCodegen (4) - HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,d_week_seq,d_day_name] - BroadcastHashJoin [sold_date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (1) - Project [ws_sold_date_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] - WholeStageCodegen (2) - Project [cs_sold_date_sk,cs_ext_sales_price] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (3) - Filter [d_date_sk,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 InputAdapter - BroadcastExchange #4 - WholeStageCodegen (5) + BroadcastExchange #6 + WholeStageCodegen (10) Project [d_week_seq] Filter [d_year,d_week_seq] ColumnarToRow InputAdapter Scan parquet default.date_dim [d_week_seq,d_year] - InputAdapter - WholeStageCodegen (13) - Sort [d_week_seq2] - InputAdapter - Exchange [d_week_seq2] #5 - WholeStageCodegen (12) - Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] - BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] - InputAdapter - ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (11) - Project [d_week_seq] - Filter [d_year,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_week_seq,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt index 55bd25c501294..5a9c4715d4b05 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt @@ -10,8 +10,8 @@ TakeOrderedAndProject (81) : +- * HashAggregate (23) : +- * Project (22) : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) : : :- Union (9) : : : :- * Project (4) : : : : +- * Filter (3) @@ -21,22 +21,22 @@ TakeOrderedAndProject (81) : : : +- * Filter (7) : : : +- * ColumnarToRow (6) : : : +- Scan parquet default.store_returns (5) - : : +- BroadcastExchange (14) - : : +- * Project (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store (10) : +- BroadcastExchange (20) - : +- * Filter (19) - : +- * ColumnarToRow (18) - : +- Scan parquet default.store (17) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) :- * HashAggregate (46) : +- Exchange (45) : +- * HashAggregate (44) : +- * Project (43) : +- * BroadcastHashJoin Inner BuildRight (42) - : :- * Project (37) - : : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) : : :- Union (34) : : : :- * Project (29) : : : : +- * Filter (28) @@ -46,18 +46,18 @@ TakeOrderedAndProject (81) : : : +- * Filter (32) : : : +- * ColumnarToRow (31) : : : +- Scan parquet default.catalog_returns (30) - : : +- ReusedExchange (35) - : +- BroadcastExchange (41) - : +- * Filter (40) - : +- * ColumnarToRow (39) - : +- Scan parquet default.catalog_page (38) + : : +- BroadcastExchange (38) + : : +- * Filter (37) + : : +- * ColumnarToRow (36) + : : +- Scan parquet default.catalog_page (35) + : +- ReusedExchange (41) +- * HashAggregate (75) +- Exchange (74) +- * HashAggregate (73) +- * Project (72) +- * BroadcastHashJoin Inner BuildRight (71) - :- * Project (66) - : +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) : :- Union (63) : : :- * Project (50) : : : +- * Filter (49) @@ -75,11 +75,11 @@ TakeOrderedAndProject (81) : : +- * Filter (58) : : +- * ColumnarToRow (57) : : +- Scan parquet default.web_sales (56) - : +- ReusedExchange (64) - +- BroadcastExchange (70) - +- * Filter (69) - +- * ColumnarToRow (68) - +- Scan parquet default.web_site (67) + : +- BroadcastExchange (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet default.web_site (64) + +- ReusedExchange (70) (1) Scan parquet default.store_sales @@ -119,81 +119,81 @@ Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_los (9) Union -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_date#22] +(10) Scan parquet default.store +Output [2]: [s_store_sk#21, s_store_id#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct (11) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] +Input [2]: [s_store_sk#21, s_store_id#22] (12) Filter [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21)) +Input [2]: [s_store_sk#21, s_store_id#22] +Condition : isnotnull(s_store_sk#21) -(13) Project [codegen id : 3] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_date#22] +(13) BroadcastExchange +Input [2]: [s_store_sk#21, s_store_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] -(14) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [date_sk#6] -Right keys [1]: [cast(d_date_sk#21 as bigint)] +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#21 as bigint)] Join condition: None -(16) Project [codegen id : 5] -Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] -Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +(15) Project [codegen id : 5] +Output [6]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [8]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#21, s_store_id#22] -(17) Scan parquet default.store -Output [2]: [s_store_sk#24, s_store_id#25] +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#24, d_date#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] -(18) ColumnarToRow [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] +(18) Filter [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] +Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 11192)) AND (d_date#25 <= 11206)) AND isnotnull(d_date_sk#24)) -(19) Filter [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] -Condition : isnotnull(s_store_sk#24) +(19) Project [codegen id : 4] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_date#25] (20) BroadcastExchange -Input [2]: [s_store_sk#24, s_store_id#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] (21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [store_sk#5] -Right keys [1]: [cast(s_store_sk#24 as bigint)] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#24 as bigint)] Join condition: None (22) Project [codegen id : 5] -Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [7]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22, d_date_sk#24] (23) HashAggregate [codegen id : 5] -Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Keys [1]: [s_store_id#25] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Keys [1]: [s_store_id#22] Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] -Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Results [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] (24) Exchange -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#22, 5), ENSURE_REQUIREMENTS, [id=#35] (25) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Keys [1]: [s_store_id#25] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#22] Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#22) AS id#44] (26) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] @@ -233,44 +233,44 @@ Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, (34) Union -(35) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#50] -Right keys [1]: [d_date_sk#21] -Join condition: None - -(37) Project [codegen id : 11] -Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] -Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] - -(38) Scan parquet default.catalog_page +(35) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct -(39) ColumnarToRow [codegen id : 10] +(36) ColumnarToRow [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -(40) Filter [codegen id : 10] +(37) Filter [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Condition : isnotnull(cp_catalog_page_sk#65) -(41) BroadcastExchange +(38) BroadcastExchange Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] -(42) BroadcastHashJoin [codegen id : 11] +(39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [page_sk#49] Right keys [1]: [cp_catalog_page_sk#65] Join condition: None +(40) Project [codegen id : 11] +Output [6]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [8]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(41) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#24] +Join condition: None + (43) Project [codegen id : 11] Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] -Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] +Input [7]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66, d_date_sk#24] (44) HashAggregate [codegen id : 11] Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] @@ -281,7 +281,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] (45) Exchange Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76] (46) HashAggregate [codegen id : 12] Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] @@ -324,7 +324,7 @@ Condition : isnotnull(wr_returned_date_sk#96) (54) Exchange Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] -Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101] +Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), ENSURE_REQUIREMENTS, [id=#101] (55) Sort [codegen id : 15] Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] @@ -346,7 +346,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND (59) Exchange Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] -Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104] +Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104] (60) Sort [codegen id : 17] Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] @@ -363,44 +363,44 @@ Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return (63) Union -(64) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(65) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [date_sk#91] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(66) Project [codegen id : 21] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] - -(67) Scan parquet default.web_site +(64) Scan parquet default.web_site Output [2]: [web_site_sk#111, web_site_id#112] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct -(68) ColumnarToRow [codegen id : 20] +(65) ColumnarToRow [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] -(69) Filter [codegen id : 20] +(66) Filter [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] Condition : isnotnull(web_site_sk#111) -(70) BroadcastExchange +(67) BroadcastExchange Input [2]: [web_site_sk#111, web_site_id#112] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] -(71) BroadcastHashJoin [codegen id : 21] +(68) BroadcastHashJoin [codegen id : 21] Left keys [1]: [wsr_web_site_sk#90] Right keys [1]: [web_site_sk#111] Join condition: None +(69) Project [codegen id : 21] +Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] + +(70) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(71) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#24 as bigint)] +Join condition: None + (72) Project [codegen id : 21] Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] +Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112, d_date_sk#24] (73) HashAggregate [codegen id : 21] Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] @@ -411,7 +411,7 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] (74) Exchange Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] -Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122] +Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122] (75) HashAggregate [codegen id : 22] Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] @@ -435,7 +435,7 @@ Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, (79) Exchange Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] -Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), true, [id=#147] +Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), ENSURE_REQUIREMENTS, [id=#147] (80) HashAggregate [codegen id : 24] Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt index 80b07a3712d36..2db6cf767729d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt @@ -15,9 +15,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (5) HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] InputAdapter Union WholeStageCodegen (1) @@ -35,18 +35,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter BroadcastExchange #3 WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #4 WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] WholeStageCodegen (12) HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter @@ -54,9 +54,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (11) HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] InputAdapter Union WholeStageCodegen (7) @@ -72,14 +72,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #6 + WholeStageCodegen (9) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + ReusedExchange [d_date_sk] #4 WholeStageCodegen (22) HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter @@ -87,9 +87,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (21) HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] InputAdapter Union WholeStageCodegen (13) @@ -122,11 +122,11 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #10 + WholeStageCodegen (19) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] InputAdapter - BroadcastExchange #10 - WholeStageCodegen (20) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + ReusedExchange [d_date_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt index d78565986bc0a..a504149b00b94 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt @@ -1,494 +1,474 @@ == Physical Plan == -TakeOrderedAndProject (71) -+- * HashAggregate (70) - +- Exchange (69) - +- * HashAggregate (68) - +- * HashAggregate (67) - +- Exchange (66) - +- * HashAggregate (65) - +- * Project (64) - +- * BroadcastHashJoin Inner BuildRight (63) - :- * Project (57) - : +- * BroadcastHashJoin Inner BuildRight (56) - : :- * Project (51) - : : +- * SortMergeJoin Inner (50) - : : :- * Sort (44) - : : : +- Exchange (43) - : : : +- * Project (42) - : : : +- * SortMergeJoin Inner (41) - : : : :- * Sort (35) - : : : : +- * HashAggregate (34) - : : : : +- * HashAggregate (33) - : : : : +- * Project (32) - : : : : +- * SortMergeJoin Inner (31) - : : : : :- * Sort (25) - : : : : : +- Exchange (24) - : : : : : +- * Project (23) - : : : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : : : :- * Project (16) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : : : :- Union (9) - : : : : : : : :- * Project (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : : : +- * Project (8) - : : : : : : : +- * Filter (7) - : : : : : : : +- * ColumnarToRow (6) - : : : : : : : +- Scan parquet default.web_sales (5) - : : : : : : +- BroadcastExchange (14) - : : : : : : +- * Project (13) - : : : : : : +- * Filter (12) - : : : : : : +- * ColumnarToRow (11) - : : : : : : +- Scan parquet default.item (10) - : : : : : +- BroadcastExchange (21) - : : : : : +- * Project (20) - : : : : : +- * Filter (19) - : : : : : +- * ColumnarToRow (18) - : : : : : +- Scan parquet default.date_dim (17) - : : : : +- * Sort (30) - : : : : +- Exchange (29) - : : : : +- * Filter (28) - : : : : +- * ColumnarToRow (27) - : : : : +- Scan parquet default.customer (26) - : : : +- * Sort (40) - : : : +- Exchange (39) - : : : +- * Filter (38) - : : : +- * ColumnarToRow (37) - : : : +- Scan parquet default.store_sales (36) - : : +- * Sort (49) - : : +- Exchange (48) - : : +- * Filter (47) - : : +- * ColumnarToRow (46) - : : +- Scan parquet default.customer_address (45) - : +- BroadcastExchange (55) - : +- * Filter (54) - : +- * ColumnarToRow (53) - : +- Scan parquet default.store (52) - +- BroadcastExchange (62) - +- * Project (61) - +- * Filter (60) - +- * ColumnarToRow (59) - +- Scan parquet default.date_dim (58) - - -(1) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * Project (61) + +- * SortMergeJoin Inner (60) + :- * Sort (47) + : +- * Project (46) + : +- * BroadcastHashJoin Inner BuildLeft (45) + : :- BroadcastExchange (10) + : : +- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.customer_address (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.store (4) + : +- * HashAggregate (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * SortMergeJoin Inner (41) + : :- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- Union (19) + : : : : :- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.catalog_sales (11) + : : : : +- * Project (18) + : : : : +- * Filter (17) + : : : : +- * ColumnarToRow (16) + : : : : +- Scan parquet default.web_sales (15) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.date_dim (20) + : : +- BroadcastExchange (31) + : : +- * Project (30) + : : +- * Filter (29) + : : +- * ColumnarToRow (28) + : : +- Scan parquet default.item (27) + : +- * Sort (40) + : +- Exchange (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer (36) + +- * Sort (59) + +- Exchange (58) + +- * Project (57) + +- * BroadcastHashJoin Inner BuildRight (56) + :- * Filter (50) + : +- * ColumnarToRow (49) + : +- Scan parquet default.store_sales (48) + +- BroadcastExchange (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.date_dim (51) + + +(1) Scan parquet default.customer_address +Output [3]: [ca_address_sk#1, ca_county#2, ca_state#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3] + +(3) Filter [codegen id : 2] +Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3] +Condition : ((isnotnull(ca_address_sk#1) AND isnotnull(ca_county#2)) AND isnotnull(ca_state#3)) + +(4) Scan parquet default.store +Output [2]: [s_county#4, s_state#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [s_county#4, s_state#5] + +(6) Filter [codegen id : 1] +Input [2]: [s_county#4, s_state#5] +Condition : (isnotnull(s_county#4) AND isnotnull(s_state#5)) + +(7) BroadcastExchange +Input [2]: [s_county#4, s_state#5] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [ca_county#2, ca_state#3] +Right keys [2]: [s_county#4, s_state#5] +Join condition: None + +(9) Project [codegen id : 2] +Output [1]: [ca_address_sk#1] +Input [5]: [ca_address_sk#1, ca_county#2, ca_state#3, s_county#4, s_state#5] + +(10) BroadcastExchange +Input [1]: [ca_address_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(11) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +(12) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] -(3) Filter [codegen id : 1] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] -Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2)) +(13) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] +Condition : ((isnotnull(cs_item_sk#10) AND isnotnull(cs_sold_date_sk#8)) AND isnotnull(cs_bill_customer_sk#9)) -(4) Project [codegen id : 1] -Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +(14) Project [codegen id : 3] +Output [3]: [cs_sold_date_sk#8 AS sold_date_sk#11, cs_bill_customer_sk#9 AS customer_sk#12, cs_item_sk#10 AS item_sk#13] +Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] -(5) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +(15) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] ReadSchema: struct -(6) ColumnarToRow [codegen id : 2] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +(16) ColumnarToRow [codegen id : 4] +Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] -(7) Filter [codegen id : 2] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] -Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9)) +(17) Filter [codegen id : 4] +Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] +Condition : ((isnotnull(ws_item_sk#15) AND isnotnull(ws_sold_date_sk#14)) AND isnotnull(ws_bill_customer_sk#16)) -(8) Project [codegen id : 2] -Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +(18) Project [codegen id : 4] +Output [3]: [ws_sold_date_sk#14 AS sold_date_sk#17, ws_bill_customer_sk#16 AS customer_sk#18, ws_item_sk#15 AS item_sk#19] +Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] -(9) Union +(19) Union -(10) Scan parquet default.item -Output [3]: [i_item_sk#13, i_class#14, i_category#15] +(20) Scan parquet default.date_dim +Output [3]: [d_date_sk#20, d_year#21, d_moy#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct -(11) ColumnarToRow [codegen id : 3] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] +(21) ColumnarToRow [codegen id : 5] +Input [3]: [d_date_sk#20, d_year#21, d_moy#22] -(12) Filter [codegen id : 3] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] -Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13)) +(22) Filter [codegen id : 5] +Input [3]: [d_date_sk#20, d_year#21, d_moy#22] +Condition : ((((isnotnull(d_moy#22) AND isnotnull(d_year#21)) AND (d_moy#22 = 12)) AND (d_year#21 = 1998)) AND isnotnull(d_date_sk#20)) -(13) Project [codegen id : 3] -Output [1]: [i_item_sk#13] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] +(23) Project [codegen id : 5] +Output [1]: [d_date_sk#20] +Input [3]: [d_date_sk#20, d_year#21, d_moy#22] -(14) BroadcastExchange -Input [1]: [i_item_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +(24) BroadcastExchange +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [item_sk#6] -Right keys [1]: [i_item_sk#13] +(25) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [sold_date_sk#11] +Right keys [1]: [d_date_sk#20] Join condition: None -(16) Project [codegen id : 5] -Output [2]: [sold_date_sk#4, customer_sk#5] -Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] +(26) Project [codegen id : 7] +Output [2]: [customer_sk#12, item_sk#13] +Input [4]: [sold_date_sk#11, customer_sk#12, item_sk#13, d_date_sk#20] -(17) Scan parquet default.date_dim -Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +(27) Scan parquet default.item +Output [3]: [i_item_sk#24, i_class#25, i_category#26] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] +ReadSchema: struct -(18) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +(28) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#24, i_class#25, i_category#26] -(19) Filter [codegen id : 4] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] -Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17)) +(29) Filter [codegen id : 6] +Input [3]: [i_item_sk#24, i_class#25, i_category#26] +Condition : ((((isnotnull(i_category#26) AND isnotnull(i_class#25)) AND (i_category#26 = Women)) AND (i_class#25 = maternity)) AND isnotnull(i_item_sk#24)) -(20) Project [codegen id : 4] -Output [1]: [d_date_sk#17] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +(30) Project [codegen id : 6] +Output [1]: [i_item_sk#24] +Input [3]: [i_item_sk#24, i_class#25, i_category#26] -(21) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +(31) BroadcastExchange +Input [1]: [i_item_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] -(22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [sold_date_sk#4] -Right keys [1]: [d_date_sk#17] +(32) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [item_sk#13] +Right keys [1]: [i_item_sk#24] Join condition: None -(23) Project [codegen id : 5] -Output [1]: [customer_sk#5] -Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17] +(33) Project [codegen id : 7] +Output [1]: [customer_sk#12] +Input [3]: [customer_sk#12, item_sk#13, i_item_sk#24] -(24) Exchange -Input [1]: [customer_sk#5] -Arguments: hashpartitioning(customer_sk#5, 5), true, [id=#21] +(34) Exchange +Input [1]: [customer_sk#12] +Arguments: hashpartitioning(customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#28] -(25) Sort [codegen id : 6] -Input [1]: [customer_sk#5] -Arguments: [customer_sk#5 ASC NULLS FIRST], false, 0 +(35) Sort [codegen id : 8] +Input [1]: [customer_sk#12] +Arguments: [customer_sk#12 ASC NULLS FIRST], false, 0 -(26) Scan parquet default.customer -Output [2]: [c_customer_sk#22, c_current_addr_sk#23] +(36) Scan parquet default.customer +Output [2]: [c_customer_sk#29, c_current_addr_sk#30] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct -(27) ColumnarToRow [codegen id : 7] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +(37) ColumnarToRow [codegen id : 9] +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] -(28) Filter [codegen id : 7] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23)) +(38) Filter [codegen id : 9] +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Condition : (isnotnull(c_customer_sk#29) AND isnotnull(c_current_addr_sk#30)) -(29) Exchange -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#24] +(39) Exchange +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#31] -(30) Sort [codegen id : 8] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +(40) Sort [codegen id : 10] +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 -(31) SortMergeJoin [codegen id : 9] -Left keys [1]: [customer_sk#5] -Right keys [1]: [c_customer_sk#22] +(41) SortMergeJoin +Left keys [1]: [customer_sk#12] +Right keys [1]: [c_customer_sk#29] Join condition: None -(32) Project [codegen id : 9] -Output [2]: [c_customer_sk#22, c_current_addr_sk#23] -Input [3]: [customer_sk#5, c_customer_sk#22, c_current_addr_sk#23] +(42) Project +Output [2]: [c_customer_sk#29, c_current_addr_sk#30] +Input [3]: [customer_sk#12, c_customer_sk#29, c_current_addr_sk#30] -(33) HashAggregate [codegen id : 9] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Keys [2]: [c_customer_sk#22, c_current_addr_sk#23] +(43) HashAggregate +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Keys [2]: [c_customer_sk#29, c_current_addr_sk#30] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#22, c_current_addr_sk#23] +Results [2]: [c_customer_sk#29, c_current_addr_sk#30] -(34) HashAggregate [codegen id : 9] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Keys [2]: [c_customer_sk#22, c_current_addr_sk#23] +(44) HashAggregate +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Keys [2]: [c_customer_sk#29, c_current_addr_sk#30] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#22, c_current_addr_sk#23] +Results [2]: [c_customer_sk#29, c_current_addr_sk#30] -(35) Sort [codegen id : 9] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +(45) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#30] +Join condition: None -(36) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +(46) Project [codegen id : 11] +Output [1]: [c_customer_sk#29] +Input [3]: [ca_address_sk#1, c_customer_sk#29, c_current_addr_sk#30] + +(47) Sort [codegen id : 11] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(37) ColumnarToRow [codegen id : 10] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] - -(38) Filter [codegen id : 10] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25)) - -(39) Exchange -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Arguments: hashpartitioning(ss_customer_sk#26, 5), true, [id=#28] - -(40) Sort [codegen id : 11] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Arguments: [ss_customer_sk#26 ASC NULLS FIRST], false, 0 - -(41) SortMergeJoin [codegen id : 12] -Left keys [1]: [c_customer_sk#22] -Right keys [1]: [ss_customer_sk#26] -Join condition: None - -(42) Project [codegen id : 12] -Output [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] -Input [5]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +(49) ColumnarToRow [codegen id : 13] +Input [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34] -(43) Exchange -Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] -Arguments: hashpartitioning(c_current_addr_sk#23, 5), true, [id=#29] +(50) Filter [codegen id : 13] +Input [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34] +Condition : (isnotnull(ss_customer_sk#33) AND isnotnull(ss_sold_date_sk#32)) -(44) Sort [codegen id : 13] -Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] -Arguments: [c_current_addr_sk#23 ASC NULLS FIRST], false, 0 - -(45) Scan parquet default.customer_address -Output [3]: [ca_address_sk#30, ca_county#31, ca_state#32] +(51) Scan parquet default.date_dim +Output [2]: [d_date_sk#20, d_month_seq#35] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 14] -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] - -(47) Filter [codegen id : 14] -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] -Condition : ((isnotnull(ca_address_sk#30) AND isnotnull(ca_county#31)) AND isnotnull(ca_state#32)) - -(48) Exchange -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] -Arguments: hashpartitioning(ca_address_sk#30, 5), true, [id=#33] - -(49) Sort [codegen id : 15] -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] -Arguments: [ca_address_sk#30 ASC NULLS FIRST], false, 0 - -(50) SortMergeJoin [codegen id : 18] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#30] -Join condition: None +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct -(51) Project [codegen id : 18] -Output [5]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32] -Input [7]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#30, ca_county#31, ca_state#32] +(52) ColumnarToRow [codegen id : 12] +Input [2]: [d_date_sk#20, d_month_seq#35] -(52) Scan parquet default.store -Output [2]: [s_county#34, s_state#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] -ReadSchema: struct +(53) Filter [codegen id : 12] +Input [2]: [d_date_sk#20, d_month_seq#35] +Condition : (((isnotnull(d_month_seq#35) AND (d_month_seq#35 >= Subquery scalar-subquery#36, [id=#37])) AND (d_month_seq#35 <= Subquery scalar-subquery#38, [id=#39])) AND isnotnull(d_date_sk#20)) -(53) ColumnarToRow [codegen id : 16] -Input [2]: [s_county#34, s_state#35] - -(54) Filter [codegen id : 16] -Input [2]: [s_county#34, s_state#35] -Condition : (isnotnull(s_county#34) AND isnotnull(s_state#35)) +(54) Project [codegen id : 12] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_month_seq#35] (55) BroadcastExchange -Input [2]: [s_county#34, s_state#35] -Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#36] +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] -(56) BroadcastHashJoin [codegen id : 18] -Left keys [2]: [ca_county#31, ca_state#32] -Right keys [2]: [s_county#34, s_state#35] +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_date_sk#32] +Right keys [1]: [d_date_sk#20] Join condition: None -(57) Project [codegen id : 18] -Output [3]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27] -Input [7]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32, s_county#34, s_state#35] - -(58) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_month_seq#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(59) ColumnarToRow [codegen id : 17] -Input [2]: [d_date_sk#17, d_month_seq#37] - -(60) Filter [codegen id : 17] -Input [2]: [d_date_sk#17, d_month_seq#37] -Condition : (((isnotnull(d_month_seq#37) AND (d_month_seq#37 >= Subquery scalar-subquery#38, [id=#39])) AND (d_month_seq#37 <= Subquery scalar-subquery#40, [id=#41])) AND isnotnull(d_date_sk#17)) +(57) Project [codegen id : 13] +Output [2]: [ss_customer_sk#33, ss_ext_sales_price#34] +Input [4]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34, d_date_sk#20] -(61) Project [codegen id : 17] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_month_seq#37] +(58) Exchange +Input [2]: [ss_customer_sk#33, ss_ext_sales_price#34] +Arguments: hashpartitioning(ss_customer_sk#33, 5), ENSURE_REQUIREMENTS, [id=#41] -(62) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +(59) Sort [codegen id : 14] +Input [2]: [ss_customer_sk#33, ss_ext_sales_price#34] +Arguments: [ss_customer_sk#33 ASC NULLS FIRST], false, 0 -(63) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_sold_date_sk#25] -Right keys [1]: [d_date_sk#17] +(60) SortMergeJoin [codegen id : 15] +Left keys [1]: [c_customer_sk#29] +Right keys [1]: [ss_customer_sk#33] Join condition: None -(64) Project [codegen id : 18] -Output [2]: [c_customer_sk#22, ss_ext_sales_price#27] -Input [4]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17] - -(65) HashAggregate [codegen id : 18] -Input [2]: [c_customer_sk#22, ss_ext_sales_price#27] -Keys [1]: [c_customer_sk#22] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] -Aggregate Attributes [1]: [sum#43] -Results [2]: [c_customer_sk#22, sum#44] - -(66) Exchange -Input [2]: [c_customer_sk#22, sum#44] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#45] - -(67) HashAggregate [codegen id : 19] -Input [2]: [c_customer_sk#22, sum#44] -Keys [1]: [c_customer_sk#22] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#46] -Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#46,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#47] - -(68) HashAggregate [codegen id : 19] -Input [1]: [segment#47] -Keys [1]: [segment#47] +(61) Project [codegen id : 15] +Output [2]: [c_customer_sk#29, ss_ext_sales_price#34] +Input [3]: [c_customer_sk#29, ss_customer_sk#33, ss_ext_sales_price#34] + +(62) HashAggregate [codegen id : 15] +Input [2]: [c_customer_sk#29, ss_ext_sales_price#34] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#34))] +Aggregate Attributes [1]: [sum#42] +Results [2]: [c_customer_sk#29, sum#43] + +(63) HashAggregate [codegen id : 15] +Input [2]: [c_customer_sk#29, sum#43] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#34))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#34))#44] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#34))#44,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#45] + +(64) HashAggregate [codegen id : 15] +Input [1]: [segment#45] +Keys [1]: [segment#45] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#48] -Results [2]: [segment#47, count#49] +Aggregate Attributes [1]: [count#46] +Results [2]: [segment#45, count#47] -(69) Exchange -Input [2]: [segment#47, count#49] -Arguments: hashpartitioning(segment#47, 5), true, [id=#50] +(65) Exchange +Input [2]: [segment#45, count#47] +Arguments: hashpartitioning(segment#45, 5), ENSURE_REQUIREMENTS, [id=#48] -(70) HashAggregate [codegen id : 20] -Input [2]: [segment#47, count#49] -Keys [1]: [segment#47] +(66) HashAggregate [codegen id : 16] +Input [2]: [segment#45, count#47] +Keys [1]: [segment#45] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#51] -Results [3]: [segment#47, count(1)#51 AS num_customers#52, (segment#47 * 50) AS segment_base#53] +Aggregate Attributes [1]: [count(1)#49] +Results [3]: [segment#45, count(1)#49 AS num_customers#50, (segment#45 * 50) AS segment_base#51] -(71) TakeOrderedAndProject -Input [3]: [segment#47, num_customers#52, segment_base#53] -Arguments: 100, [segment#47 ASC NULLS FIRST, num_customers#52 ASC NULLS FIRST], [segment#47, num_customers#52, segment_base#53] +(67) TakeOrderedAndProject +Input [3]: [segment#45, num_customers#50, segment_base#51] +Arguments: 100, [segment#45 ASC NULLS FIRST, num_customers#50 ASC NULLS FIRST], [segment#45, num_customers#50, segment_base#51] ===== Subqueries ===== -Subquery:1 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#38, [id=#39] -* HashAggregate (78) -+- Exchange (77) - +- * HashAggregate (76) - +- * Project (75) - +- * Filter (74) - +- * ColumnarToRow (73) - +- Scan parquet default.date_dim (72) +Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#36, [id=#37] +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * Filter (70) + +- * ColumnarToRow (69) + +- Scan parquet default.date_dim (68) -(72) Scan parquet default.date_dim -Output [3]: [d_month_seq#37, d_year#18, d_moy#19] +(68) Scan parquet default.date_dim +Output [3]: [d_month_seq#35, d_year#21, d_moy#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct -(73) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(69) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(74) Filter [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] -Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) +(70) Filter [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] +Condition : (((isnotnull(d_year#21) AND isnotnull(d_moy#22)) AND (d_year#21 = 1998)) AND (d_moy#22 = 12)) -(75) Project [codegen id : 1] -Output [1]: [(d_month_seq#37 + 1) AS (d_month_seq + 1)#54] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(71) Project [codegen id : 1] +Output [1]: [(d_month_seq#35 + 1) AS (d_month_seq + 1)#52] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(76) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 1)#54] -Keys [1]: [(d_month_seq + 1)#54] +(72) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#52] +Keys [1]: [(d_month_seq + 1)#52] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#54] +Results [1]: [(d_month_seq + 1)#52] -(77) Exchange -Input [1]: [(d_month_seq + 1)#54] -Arguments: hashpartitioning((d_month_seq + 1)#54, 5), true, [id=#55] +(73) Exchange +Input [1]: [(d_month_seq + 1)#52] +Arguments: hashpartitioning((d_month_seq + 1)#52, 5), ENSURE_REQUIREMENTS, [id=#53] -(78) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 1)#54] -Keys [1]: [(d_month_seq + 1)#54] +(74) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#52] +Keys [1]: [(d_month_seq + 1)#52] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#54] +Results [1]: [(d_month_seq + 1)#52] -Subquery:2 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#40, [id=#41] -* HashAggregate (85) -+- Exchange (84) - +- * HashAggregate (83) - +- * Project (82) - +- * Filter (81) - +- * ColumnarToRow (80) - +- Scan parquet default.date_dim (79) +Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (81) ++- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * Filter (77) + +- * ColumnarToRow (76) + +- Scan parquet default.date_dim (75) -(79) Scan parquet default.date_dim -Output [3]: [d_month_seq#37, d_year#18, d_moy#19] +(75) Scan parquet default.date_dim +Output [3]: [d_month_seq#35, d_year#21, d_moy#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct -(80) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(76) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(81) Filter [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] -Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) +(77) Filter [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] +Condition : (((isnotnull(d_year#21) AND isnotnull(d_moy#22)) AND (d_year#21 = 1998)) AND (d_moy#22 = 12)) -(82) Project [codegen id : 1] -Output [1]: [(d_month_seq#37 + 3) AS (d_month_seq + 3)#56] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(78) Project [codegen id : 1] +Output [1]: [(d_month_seq#35 + 3) AS (d_month_seq + 3)#54] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(83) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 3)#56] -Keys [1]: [(d_month_seq + 3)#56] +(79) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#54] +Keys [1]: [(d_month_seq + 3)#54] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#56] +Results [1]: [(d_month_seq + 3)#54] -(84) Exchange -Input [1]: [(d_month_seq + 3)#56] -Arguments: hashpartitioning((d_month_seq + 3)#56, 5), true, [id=#57] +(80) Exchange +Input [1]: [(d_month_seq + 3)#54] +Arguments: hashpartitioning((d_month_seq + 3)#54, 5), ENSURE_REQUIREMENTS, [id=#55] -(85) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 3)#56] -Keys [1]: [(d_month_seq + 3)#56] +(81) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#54] +Keys [1]: [(d_month_seq + 3)#54] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#56] +Results [1]: [(d_month_seq + 3)#54] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt index cb7130f53c9a9..3b0622cbf9264 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt @@ -1,142 +1,130 @@ TakeOrderedAndProject [segment,num_customers,segment_base] - WholeStageCodegen (20) + WholeStageCodegen (16) HashAggregate [segment,count] [count(1),num_customers,segment_base,count] InputAdapter Exchange [segment] #1 - WholeStageCodegen (19) + WholeStageCodegen (15) HashAggregate [segment] [count,count] HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] - InputAdapter - Exchange [c_customer_sk] #2 - WholeStageCodegen (18) - HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] - Project [c_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] - BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ca_county,ca_state] - SortMergeJoin [c_current_addr_sk,ca_address_sk] - InputAdapter - WholeStageCodegen (13) - Sort [c_current_addr_sk] + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + Project [c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ca_address_sk] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] InputAdapter - Exchange [c_current_addr_sk] #3 - WholeStageCodegen (12) - Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] - SortMergeJoin [c_customer_sk,ss_customer_sk] + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [s_county,s_state] + ColumnarToRow InputAdapter - WholeStageCodegen (9) - Sort [c_customer_sk] - HashAggregate [c_customer_sk,c_current_addr_sk] - HashAggregate [c_customer_sk,c_current_addr_sk] - Project [c_customer_sk,c_current_addr_sk] - SortMergeJoin [customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (6) - Sort [customer_sk] + Scan parquet default.store [s_county,s_state] + HashAggregate [c_customer_sk,c_current_addr_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + SortMergeJoin [customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (8) + Sort [customer_sk] + InputAdapter + Exchange [customer_sk] #4 + WholeStageCodegen (7) + Project [customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] + Project [customer_sk,item_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (3) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] + ColumnarToRow InputAdapter - Exchange [customer_sk] #4 - WholeStageCodegen (5) - Project [customer_sk] - BroadcastHashJoin [sold_date_sk,d_date_sk] - Project [sold_date_sk,customer_sk] - BroadcastHashJoin [item_sk,i_item_sk] - InputAdapter - Union - WholeStageCodegen (1) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - WholeStageCodegen (2) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] - Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [i_item_sk] - Filter [i_category,i_class,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (4) - Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - WholeStageCodegen (8) - Sort [c_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + WholeStageCodegen (4) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow InputAdapter - Exchange [c_customer_sk] #7 - WholeStageCodegen (7) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] - InputAdapter - WholeStageCodegen (11) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #8 - WholeStageCodegen (10) - Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] - InputAdapter - WholeStageCodegen (15) - Sort [ca_address_sk] - InputAdapter - Exchange [ca_address_sk] #9 - WholeStageCodegen (14) - Filter [ca_address_sk,ca_county,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (16) - Filter [s_county,s_state] + Scan parquet default.item [i_item_sk,i_class,i_category] + InputAdapter + WholeStageCodegen (10) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + WholeStageCodegen (14) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (13) + Project [ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.store [s_county,s_state] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (17) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - Subquery #1 - WholeStageCodegen (2) - HashAggregate [(d_month_seq + 1)] - InputAdapter - Exchange [(d_month_seq + 1)] #12 - WholeStageCodegen (1) - HashAggregate [(d_month_seq + 1)] - Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - Subquery #2 - WholeStageCodegen (2) - HashAggregate [(d_month_seq + 3)] - InputAdapter - Exchange [(d_month_seq + 3)] #13 - WholeStageCodegen (1) - HashAggregate [(d_month_seq + 3)] - Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (12) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #10 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #11 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt index 432ef4db6b1eb..411cbf4809cd1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt @@ -13,8 +13,8 @@ TakeOrderedAndProject (94) : : +- * HashAggregate (23) : : +- * Project (22) : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) : : : :- Union (9) : : : : :- * Project (4) : : : : : +- * Filter (3) @@ -24,22 +24,22 @@ TakeOrderedAndProject (94) : : : : +- * Filter (7) : : : : +- * ColumnarToRow (6) : : : : +- Scan parquet default.store_returns (5) - : : : +- BroadcastExchange (14) - : : : +- * Project (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) : : +- BroadcastExchange (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.store (17) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.date_dim (16) : :- * HashAggregate (46) : : +- Exchange (45) : : +- * HashAggregate (44) : : +- * Project (43) : : +- * BroadcastHashJoin Inner BuildRight (42) - : : :- * Project (37) - : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (40) + : : : +- * BroadcastHashJoin Inner BuildRight (39) : : : :- Union (34) : : : : :- * Project (29) : : : : : +- * Filter (28) @@ -49,18 +49,18 @@ TakeOrderedAndProject (94) : : : : +- * Filter (32) : : : : +- * ColumnarToRow (31) : : : : +- Scan parquet default.catalog_returns (30) - : : : +- ReusedExchange (35) - : : +- BroadcastExchange (41) - : : +- * Filter (40) - : : +- * ColumnarToRow (39) - : : +- Scan parquet default.catalog_page (38) + : : : +- BroadcastExchange (38) + : : : +- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.catalog_page (35) + : : +- ReusedExchange (41) : +- * HashAggregate (75) : +- Exchange (74) : +- * HashAggregate (73) : +- * Project (72) : +- * BroadcastHashJoin Inner BuildRight (71) - : :- * Project (66) - : : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (69) + : : +- * BroadcastHashJoin Inner BuildRight (68) : : :- Union (63) : : : :- * Project (50) : : : : +- * Filter (49) @@ -78,11 +78,11 @@ TakeOrderedAndProject (94) : : : +- * Filter (58) : : : +- * ColumnarToRow (57) : : : +- Scan parquet default.web_sales (56) - : : +- ReusedExchange (64) - : +- BroadcastExchange (70) - : +- * Filter (69) - : +- * ColumnarToRow (68) - : +- Scan parquet default.web_site (67) + : : +- BroadcastExchange (67) + : : +- * Filter (66) + : : +- * ColumnarToRow (65) + : : +- Scan parquet default.web_site (64) + : +- ReusedExchange (70) :- * HashAggregate (84) : +- Exchange (83) : +- * HashAggregate (82) @@ -132,81 +132,81 @@ Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_los (9) Union -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_date#22] +(10) Scan parquet default.store +Output [2]: [s_store_sk#21, s_store_id#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct (11) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] +Input [2]: [s_store_sk#21, s_store_id#22] (12) Filter [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 10442)) AND (d_date#22 <= 10456)) AND isnotnull(d_date_sk#21)) +Input [2]: [s_store_sk#21, s_store_id#22] +Condition : isnotnull(s_store_sk#21) -(13) Project [codegen id : 3] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_date#22] +(13) BroadcastExchange +Input [2]: [s_store_sk#21, s_store_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] -(14) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [date_sk#6] -Right keys [1]: [cast(d_date_sk#21 as bigint)] +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#21 as bigint)] Join condition: None -(16) Project [codegen id : 5] -Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] -Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +(15) Project [codegen id : 5] +Output [6]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [8]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#21, s_store_id#22] -(17) Scan parquet default.store -Output [2]: [s_store_sk#24, s_store_id#25] +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#24, d_date#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] -(18) ColumnarToRow [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] +(18) Filter [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] +Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 10442)) AND (d_date#25 <= 10456)) AND isnotnull(d_date_sk#24)) -(19) Filter [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] -Condition : isnotnull(s_store_sk#24) +(19) Project [codegen id : 4] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_date#25] (20) BroadcastExchange -Input [2]: [s_store_sk#24, s_store_id#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] (21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [store_sk#5] -Right keys [1]: [cast(s_store_sk#24 as bigint)] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#24 as bigint)] Join condition: None (22) Project [codegen id : 5] -Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [7]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22, d_date_sk#24] (23) HashAggregate [codegen id : 5] -Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Keys [1]: [s_store_id#25] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Keys [1]: [s_store_id#22] Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] -Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Results [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] (24) Exchange -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#22, 5), ENSURE_REQUIREMENTS, [id=#35] (25) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Keys [1]: [s_store_id#25] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#22] Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] -Results [5]: [store channel AS channel#40, concat(store, s_store_id#25) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44] +Results [5]: [store channel AS channel#40, concat(store, s_store_id#22) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44] (26) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] @@ -246,44 +246,44 @@ Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, (34) Union -(35) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#50] -Right keys [1]: [d_date_sk#21] -Join condition: None - -(37) Project [codegen id : 11] -Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] -Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] - -(38) Scan parquet default.catalog_page +(35) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct -(39) ColumnarToRow [codegen id : 10] +(36) ColumnarToRow [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -(40) Filter [codegen id : 10] +(37) Filter [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Condition : isnotnull(cp_catalog_page_sk#65) -(41) BroadcastExchange +(38) BroadcastExchange Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] -(42) BroadcastHashJoin [codegen id : 11] +(39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [page_sk#49] Right keys [1]: [cp_catalog_page_sk#65] Join condition: None +(40) Project [codegen id : 11] +Output [6]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [8]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(41) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#24] +Join condition: None + (43) Project [codegen id : 11] Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] -Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] +Input [7]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66, d_date_sk#24] (44) HashAggregate [codegen id : 11] Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] @@ -376,44 +376,44 @@ Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return (63) Union -(64) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(65) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [date_sk#91] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(66) Project [codegen id : 21] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] - -(67) Scan parquet default.web_site +(64) Scan parquet default.web_site Output [2]: [web_site_sk#111, web_site_id#112] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct -(68) ColumnarToRow [codegen id : 20] +(65) ColumnarToRow [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] -(69) Filter [codegen id : 20] +(66) Filter [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] Condition : isnotnull(web_site_sk#111) -(70) BroadcastExchange +(67) BroadcastExchange Input [2]: [web_site_sk#111, web_site_id#112] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] -(71) BroadcastHashJoin [codegen id : 21] +(68) BroadcastHashJoin [codegen id : 21] Left keys [1]: [wsr_web_site_sk#90] Right keys [1]: [web_site_sk#111] Join condition: None +(69) Project [codegen id : 21] +Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] + +(70) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(71) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#24 as bigint)] +Join condition: None + (72) Project [codegen id : 21] Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] +Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112, d_date_sk#24] (73) HashAggregate [codegen id : 21] Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt index 233af6d8cc813..8d1794b903178 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt @@ -22,9 +22,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (5) HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] InputAdapter Union WholeStageCodegen (1) @@ -42,18 +42,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter BroadcastExchange #4 WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #5 WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] WholeStageCodegen (12) HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter @@ -61,9 +61,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (11) HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] InputAdapter Union WholeStageCodegen (7) @@ -79,14 +79,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] InputAdapter - ReusedExchange [d_date_sk] #4 + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + ReusedExchange [d_date_sk] #5 WholeStageCodegen (22) HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter @@ -94,9 +94,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (21) HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] InputAdapter Union WholeStageCodegen (13) @@ -129,14 +129,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] InputAdapter - ReusedExchange [d_date_sk] #4 + BroadcastExchange #11 + WholeStageCodegen (19) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (20) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + ReusedExchange [d_date_sk] #5 WholeStageCodegen (49) HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter