Skip to content

Commit

Permalink
[SPARK-24575][SQL] Prohibit window expressions inside WHERE and HAVIN…
Browse files Browse the repository at this point in the history
…G clauses
  • Loading branch information
aokolnychyi committed Jun 17, 2018
1 parent c7c0b08 commit 9a07ea3
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1923,6 +1923,9 @@ class Analyzer(
// "Aggregate with Having clause" will be triggered.
def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {

case Filter(condition, _) if hasWindowFunction(condition) =>
failAnalysis("It is not allowed to use window functions inside WHERE and HAVING clauses")

// Aggregate with Having clause. This rule works with an unresolved Aggregate because
// a resolved Aggregate will not have Window Functions.
case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,14 @@

package org.apache.spark.sql

import java.sql.{Date, Timestamp}

import scala.collection.mutable
import org.scalatest.Matchers.the

import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

/**
* Window function testing for DataFrame API.
Expand Down Expand Up @@ -624,4 +621,41 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
}
}
}

test("SPARK-24575: Window functions inside WHERE and HAVING clauses") {
def checkAnalysisError(df: => DataFrame): Unit = {
val thrownException = the [AnalysisException] thrownBy {
df.queryExecution.analyzed
}
assert(thrownException.message.contains("window functions inside WHERE and HAVING clauses"))
}

checkAnalysisError(testData2.select('a).where(rank().over(Window.orderBy('b)) === 1))
checkAnalysisError(testData2.where('b === 2 && rank().over(Window.orderBy('b)) === 1))
checkAnalysisError(
testData2.groupBy('a)
.agg(avg('b).as("avgb"))
.where('a > 'avgb && rank().over(Window.orderBy('a)) === 1))
checkAnalysisError(
testData2.groupBy('a)
.agg(max('b).as("maxb"), sum('b).as("sumb"))
.where(rank().over(Window.orderBy('a)) === 1))
checkAnalysisError(
testData2.groupBy('a)
.agg(max('b).as("maxb"), sum('b).as("sumb"))
.where('sumb === 5 && rank().over(Window.orderBy('a)) === 1))

checkAnalysisError(sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY b) = 1"))
checkAnalysisError(sql("SELECT * FROM testData2 WHERE b = 2 AND RANK() OVER(ORDER BY b) = 1"))
checkAnalysisError(
sql("SELECT * FROM testData2 GROUP BY a HAVING a > AVG(b) AND RANK() OVER(ORDER BY a) = 1"))
checkAnalysisError(
sql("SELECT a, MAX(b), SUM(b) FROM testData2 GROUP BY a HAVING RANK() OVER(ORDER BY a) = 1"))
checkAnalysisError(
sql(
s"""SELECT a, MAX(b)
|FROM testData2
|GROUP BY a
|HAVING SUM(b) = 5 AND RANK() OVER(ORDER BY a) = 1""".stripMargin))
}
}

0 comments on commit 9a07ea3

Please sign in to comment.