-
Notifications
You must be signed in to change notification settings - Fork 28.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-14939][SQL] Add FoldablePropagation optimizer
## What changes were proposed in this pull request? This PR aims to add new **FoldablePropagation** optimizer that propagates foldable expressions by replacing all attributes with the aliases of original foldable expression. Other optimizations will take advantage of the propagated foldable expressions: e.g. `EliminateSorts` optimizer now can handle the following Case 2 and 3. (Case 1 is the previous implementation.) 1. Literals and foldable expression, e.g. "ORDER BY 1.0, 'abc', Now()" 2. Foldable ordinals, e.g. "SELECT 1.0, 'abc', Now() ORDER BY 1, 2, 3" 3. Foldable aliases, e.g. "SELECT 1.0 x, 'abc' y, Now() z ORDER BY x, y, z" This PR has been generalized based on cloud-fan 's key ideas many times; he should be credited for the work he did. **Before** ``` scala> sql("SELECT 1.0, Now() x ORDER BY 1, x").explain == Physical Plan == WholeStageCodegen : +- Sort [1.0#5 ASC,x#0 ASC], true, 0 : +- INPUT +- Exchange rangepartitioning(1.0#5 ASC, x#0 ASC, 200), None +- WholeStageCodegen : +- Project [1.0 AS 1.0#5,1461873043577000 AS x#0] : +- INPUT +- Scan OneRowRelation[] ``` **After** ``` scala> sql("SELECT 1.0, Now() x ORDER BY 1, x").explain == Physical Plan == WholeStageCodegen : +- Project [1.0 AS 1.0#5,1461873079484000 AS x#0] : +- INPUT +- Scan OneRowRelation[] ``` ## How was this patch tested? Pass the Jenkins tests including a new test case. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12719 from dongjoon-hyun/SPARK-14939.
- Loading branch information
1 parent
e2ec32d
commit 5907ebf
Showing
6 changed files
with
208 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
131 changes: 131 additions & 0 deletions
131
...yst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.catalyst.optimizer | ||
|
||
import org.apache.spark.sql.catalyst.dsl.expressions._ | ||
import org.apache.spark.sql.catalyst.dsl.plans._ | ||
import org.apache.spark.sql.catalyst.expressions._ | ||
import org.apache.spark.sql.catalyst.plans._ | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.catalyst.rules._ | ||
|
||
class FoldablePropagationSuite extends PlanTest { | ||
object Optimize extends RuleExecutor[LogicalPlan] { | ||
val batches = | ||
Batch("Foldable Propagation", FixedPoint(20), | ||
FoldablePropagation) :: Nil | ||
} | ||
|
||
val testRelation = LocalRelation('a.int, 'b.int) | ||
|
||
test("Propagate from subquery") { | ||
val query = OneRowRelation | ||
.select(Literal(1).as('a), Literal(2).as('b)) | ||
.subquery('T) | ||
.select('a, 'b) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = OneRowRelation | ||
.select(Literal(1).as('a), Literal(2).as('b)) | ||
.subquery('T) | ||
.select(Literal(1).as('a), Literal(2).as('b)).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
|
||
test("Propagate to select clause") { | ||
val query = testRelation | ||
.select('a.as('x), "str".as('y), 'b.as('z)) | ||
.select('x, 'y, 'z) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = testRelation | ||
.select('a.as('x), "str".as('y), 'b.as('z)) | ||
.select('x, "str".as('y), 'z).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
|
||
test("Propagate to where clause") { | ||
val query = testRelation | ||
.select("str".as('y)) | ||
.where('y === "str" && "str" === 'y) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = testRelation | ||
.select("str".as('y)) | ||
.where("str".as('y) === "str" && "str" === "str".as('y)).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
|
||
test("Propagate to orderBy clause") { | ||
val query = testRelation | ||
.select('a.as('x), Year(CurrentDate()).as('y), 'b) | ||
.orderBy('x.asc, 'y.asc, 'b.desc) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = testRelation | ||
.select('a.as('x), Year(CurrentDate()).as('y), 'b) | ||
.orderBy('x.asc, SortOrder(Year(CurrentDate()), Ascending), 'b.desc).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
|
||
test("Propagate to groupBy clause") { | ||
val query = testRelation | ||
.select('a.as('x), Year(CurrentDate()).as('y), 'b) | ||
.groupBy('x, 'y, 'b)(sum('x), avg('y).as('AVG), count('b)) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = testRelation | ||
.select('a.as('x), Year(CurrentDate()).as('y), 'b) | ||
.groupBy('x, Year(CurrentDate()).as('y), 'b)(sum('x), avg(Year(CurrentDate())).as('AVG), | ||
count('b)).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
|
||
test("Propagate in a complex query") { | ||
val query = testRelation | ||
.select('a.as('x), Year(CurrentDate()).as('y), 'b) | ||
.where('x > 1 && 'y === 2016 && 'b > 1) | ||
.groupBy('x, 'y, 'b)(sum('x), avg('y).as('AVG), count('b)) | ||
.orderBy('x.asc, 'AVG.asc) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = testRelation | ||
.select('a.as('x), Year(CurrentDate()).as('y), 'b) | ||
.where('x > 1 && Year(CurrentDate()).as('y) === 2016 && 'b > 1) | ||
.groupBy('x, Year(CurrentDate()).as("y"), 'b)(sum('x), avg(Year(CurrentDate())).as('AVG), | ||
count('b)) | ||
.orderBy('x.asc, 'AVG.asc).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
|
||
test("Propagate in subqueries of Union queries") { | ||
val query = Union( | ||
Seq( | ||
testRelation.select(Literal(1).as('x), 'a).select('x + 'a), | ||
testRelation.select(Literal(2).as('x), 'a).select('x + 'a))) | ||
.select('x) | ||
val optimized = Optimize.execute(query.analyze) | ||
val correctAnswer = Union( | ||
Seq( | ||
testRelation.select(Literal(1).as('x), 'a).select((Literal(1).as('x) + 'a).as("(x + a)")), | ||
testRelation.select(Literal(2).as('x), 'a).select((Literal(2).as('x) + 'a).as("(x + a)")))) | ||
.select('x).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters