From 2e23da2bda47dcfa8d143d317aff56860b77fe80 Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 7 Jul 2020 13:39:04 +0000 Subject: [PATCH] [SPARK-31975][SQL] Show AnalysisException when WindowFunction is used without WindowExpression ### What changes were proposed in this pull request? Add WindowFunction check at `CheckAnalysis`. ### Why are the changes needed? Provide friendly error msg. **BEFORE** ```scala scala> sql("select rank() from values(1)").show java.lang.UnsupportedOperationException: Cannot generate code for expression: rank() ``` **AFTER** ```scala scala> sql("select rank() from values(1)").show org.apache.spark.sql.AnalysisException: Window function rank() requires an OVER clause.;; Project [rank() AS RANK()#3] +- LocalRelation [col1#2] ``` ### Does this PR introduce _any_ user-facing change? Yes, user wiill be given a better error msg. ### How was this patch tested? Pass the newly added UT. Closes #28808 from ulysses-you/SPARK-31975. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 +++++ .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9c99acaa994b8..43dd0979eed7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -158,6 +158,11 @@ trait CheckAnalysis extends PredicateHelper { case g: GroupingID => failAnalysis("grouping_id() can only be used with GroupingSets/Cube/Rollup") + case e: Expression if e.children.exists(_.isInstanceOf[WindowFunction]) && + !e.isInstanceOf[WindowExpression] => + val w = e.children.find(_.isInstanceOf[WindowFunction]).get + failAnalysis(s"Window function $w requires an OVER clause.") + case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) => failAnalysis(s"Distinct window functions are not supported: $w") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index c15ec49e14282..c0be49af2107d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -884,4 +884,15 @@ class AnalysisSuite extends AnalysisTest with Matchers { Seq("Intersect can only be performed on tables with the compatible column types. " + "timestamp <> double at the second column of the second table")) } + + test("SPARK-31975: Throw user facing error when use WindowFunction directly") { + assertAnalysisError(testRelation2.select(RowNumber()), + Seq("Window function row_number() requires an OVER clause.")) + + assertAnalysisError(testRelation2.select(Sum(RowNumber())), + Seq("Window function row_number() requires an OVER clause.")) + + assertAnalysisError(testRelation2.select(RowNumber() + 1), + Seq("Window function row_number() requires an OVER clause.")) + } }