Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-36086][SQL][3.1] CollapseProject project replace alias should use origin column name #33685

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -72,7 +72,7 @@ trait AliasHelper {
// Use transformUp to prevent infinite recursion when the replacement expression
// redefines the same ExprId,
trimNonTopLevelAliases(expr.transformUp {
case a: Attribute => aliasMap.getOrElse(a, a)
case a: Attribute => aliasMap.get(a).map(_.withName(a.name)).getOrElse(a)
}).asInstanceOf[NamedExpression]
}

Expand Down
Expand Up @@ -180,6 +180,14 @@ case class Alias(child: Expression, name: String)(
}
}

def withName(newName: String): NamedExpression = {
Alias(child, newName)(
exprId = exprId,
qualifier = qualifier,
explicitMetadata = explicitMetadata,
nonInheritableMetadataKeys = nonInheritableMetadataKeys)
}

def newInstance(): NamedExpression =
Alias(child, name)(
qualifier = qualifier,
Expand Down
Expand Up @@ -170,4 +170,13 @@ class CollapseProjectSuite extends PlanTest {
val expected = Sample(0.0, 0.6, false, 11L, relation.select('a as 'c)).analyze
comparePlans(optimized, expected)
}

test("SPARK-36086: CollapseProject should keep output schema name") {
val relation = LocalRelation('a.int, 'b.int)
val select = relation.select(('a + 'b).as('c)).analyze
val query = Project(Seq(select.output.head.withName("C")), select)
val optimized = Optimize.execute(query)
val expected = relation.select(('a + 'b).as('C)).analyze
comparePlans(optimized, expected)
}
}
Expand Up @@ -186,14 +186,14 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]

(24) Exchange
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]

(25) HashAggregate [codegen id : 6]
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
Keys [1]: [s_store_id#25]
Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44]

(26) Scan parquet default.catalog_sales
Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48]
Expand Down Expand Up @@ -281,14 +281,14 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]

(45) Exchange
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76]

(46) HashAggregate [codegen id : 12]
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
Keys [1]: [cp_catalog_page_id#66]
Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]

(47) Scan parquet default.web_sales
Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89]
Expand Down Expand Up @@ -324,7 +324,7 @@ Condition : isnotnull(wr_returned_date_sk#96)

(54) Exchange
Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100]
Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101]
Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), ENSURE_REQUIREMENTS, [id=#101]

(55) Sort [codegen id : 15]
Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100]
Expand All @@ -346,7 +346,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND

(59) Exchange
Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104]
Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104]

(60) Sort [codegen id : 17]
Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
Expand Down Expand Up @@ -411,19 +411,19 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]

(74) Exchange
Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122]
Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122]

(75) HashAggregate [codegen id : 22]
Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
Keys [1]: [web_site_id#112]
Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#123, sum(UnscaledValue(return_amt#94))#124, sum(UnscaledValue(profit#93))#125, sum(UnscaledValue(net_loss#95))#126]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#123,17,2) AS sales#127, MakeDecimal(sum(UnscaledValue(return_amt#94))#124,17,2) AS RETURNS#128, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#125,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#126,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS channel#130, concat(web_site, web_site_id#112) AS id#131]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#123,17,2) AS sales#127, MakeDecimal(sum(UnscaledValue(return_amt#94))#124,17,2) AS returns#128, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#125,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#126,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS channel#130, concat(web_site, web_site_id#112) AS id#131]

(76) Union

(77) Expand [codegen id : 23]
Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44]
Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44]
Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#132, id#133, spark_grouping_id#134]

(78) HashAggregate [codegen id : 23]
Expand All @@ -435,7 +435,7 @@ Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142,

(79) Exchange
Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146]
Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), true, [id=#147]
Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), ENSURE_REQUIREMENTS, [id=#147]

(80) HashAggregate [codegen id : 24]
Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146]
Expand Down
Expand Up @@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Union
WholeStageCodegen (6)
HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [s_store_id] #2
WholeStageCodegen (5)
Expand Down Expand Up @@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Scan parquet default.store [s_store_sk,s_store_id]
WholeStageCodegen (12)
HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [cp_catalog_page_id] #5
WholeStageCodegen (11)
Expand Down Expand Up @@ -81,7 +81,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
WholeStageCodegen (22)
HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [web_site_id] #7
WholeStageCodegen (21)
Expand Down
Expand Up @@ -183,14 +183,14 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]

(24) Exchange
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]

(25) HashAggregate [codegen id : 6]
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
Keys [1]: [s_store_id#25]
Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44]

(26) Scan parquet default.catalog_sales
Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48]
Expand Down Expand Up @@ -278,14 +278,14 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]

(45) Exchange
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76]

(46) HashAggregate [codegen id : 12]
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
Keys [1]: [cp_catalog_page_id#66]
Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]

(47) Scan parquet default.web_sales
Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89]
Expand Down Expand Up @@ -396,19 +396,19 @@ Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]

(71) Exchange
Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121]
Arguments: hashpartitioning(web_site_id#111, 5), ENSURE_REQUIREMENTS, [id=#121]

(72) HashAggregate [codegen id : 19]
Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
Keys [1]: [web_site_id#111]
Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#122, sum(UnscaledValue(return_amt#94))#123, sum(UnscaledValue(profit#93))#124, sum(UnscaledValue(net_loss#95))#125]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS RETURNS#127, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS channel#129, concat(web_site, web_site_id#111) AS id#130]
Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS returns#127, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS channel#129, concat(web_site, web_site_id#111) AS id#130]

(73) Union

(74) Expand [codegen id : 20]
Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44]
Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44]
Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133]

(75) HashAggregate [codegen id : 20]
Expand All @@ -420,7 +420,7 @@ Results [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141,

(76) Exchange
Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145]
Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5), true, [id=#146]
Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5), ENSURE_REQUIREMENTS, [id=#146]

(77) HashAggregate [codegen id : 21]
Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145]
Expand Down