apache · AngersZhuuuu · Mar 10, 2020 · Mar 11, 2020 · Mar 11, 2020 · Mar 11, 2020
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1691,7 +1691,19 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitWindowDef(ctx: WindowDefContext): WindowSpecDefinition = withOrigin(ctx) {
     // CLUSTER BY ... | PARTITION BY ... ORDER BY ...
     val partition = ctx.partition.asScala.map(expression)
-    val order = ctx.sortItem.asScala.map(visitSortItem)
+    val order = if (ctx.sortItem.asScala.nonEmpty) {
+      ctx.sortItem.asScala.map(visitSortItem)
+    } else if (ctx.windowFrame != null &&
+      ctx.windowFrame().frameType.getType == SqlBaseParser.RANGE) {
+      // for RANGE window frame, we won't add default order spec
+      ctx.sortItem.asScala.map(visitSortItem)
+    } else {
+      // Same default behaviors like hive, when order spec is null
+      // set partition spec expression as order spec
+      ctx.partition.asScala.map { expr =>
+        SortOrder(expression(expr), Ascending, Ascending.defaultNullOrdering, Set.empty)
 case WindowExpression(wf: WindowFunction, spec) if spec.orderSpec.isEmpty => 
   failAnalysis(s"Window function $wf requires window to be ordered, please add ORDER BY " + 
     s"clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " + 
     s"ORDER BY window_ordering) from table") 
 case WindowExpression(wf: WindowFunction, spec) if spec.orderSpec.isEmpty => 
   failAnalysis(s"Window function $wf requires window to be ordered, please add ORDER BY " + 
     s"clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " + 
     s"ORDER BY window_ordering) from table") 
+      }
+    }
 
     // RANGE/ROWS BETWEEN ...
     val frameSpecOption = Option(ctx.windowFrame).map { frame =>

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -283,12 +283,13 @@ class ExpressionParserSuite extends AnalysisTest {
       WindowExpression(func, WindowSpecDefinition(partitioning, ordering, frame))
     }
 
+    val defaultOrder = Seq(SortOrder('a, Ascending, Set.empty), SortOrder('b, Ascending, Set.empty))
     // Basic window testing.
     assertEqual("foo(*) over w1", UnresolvedWindowExpression(func, WindowSpecReference("w1")))
     assertEqual("foo(*) over ()", windowed())
-    assertEqual("foo(*) over (partition by a, b)", windowed(Seq('a, 'b)))
-    assertEqual("foo(*) over (distribute by a, b)", windowed(Seq('a, 'b)))
-    assertEqual("foo(*) over (cluster by a, b)", windowed(Seq('a, 'b)))
+    assertEqual("foo(*) over (partition by a, b)", windowed(Seq('a, 'b), defaultOrder))
+    assertEqual("foo(*) over (distribute by a, b)", windowed(Seq('a, 'b), defaultOrder))
+    assertEqual("foo(*) over (cluster by a, b)", windowed(Seq('a, 'b), defaultOrder))
     assertEqual("foo(*) over (order by a desc, b asc)", windowed(Seq.empty, Seq('a.desc, 'b.asc)))
     assertEqual("foo(*) over (sort by a desc, b asc)", windowed(Seq.empty, Seq('a.desc, 'b.asc)))
     assertEqual("foo(*) over (partition by a, b order by c)", windowed(Seq('a, 'b), Seq('c.asc)))

diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out
@@ -14,7 +14,7 @@ struct<>
 SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
 GROUP BY four, ten ORDER BY four, ten
 -- !query schema
-struct<four:int,ten:int,sum(sum(CAST(four AS BIGINT))) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(ten):double>
+struct<four:int,ten:int,sum(sum(CAST(four AS BIGINT))) OVER (PARTITION BY four ORDER BY four ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,avg(ten):double>
 -- !query output
 0	0	0	0.0
 0	2	0	2.0
@@ -306,7 +306,7 @@ SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
 (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
 ORDER BY four, ten
 -- !query schema
-struct<last(ten, false) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):int,ten:int,four:int>
+struct<last(ten, false) OVER (PARTITION BY four ORDER BY four ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
 -- !query output
 4	0	0
 4	0	0
@@ -341,8 +341,7 @@ struct<ten:int,two:int,gsum:bigint,wsum:bigint>
 -- !query
 SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10
 -- !query schema
-struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,four:int>
--- !query output
+struct<count(1) OVER (PARTITION BY four ORDER BY four ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,four:int>-- !query output
 2	3
 2	3
 4	1
@@ -422,7 +421,7 @@ struct<ten:int,two:int,gsum:bigint,wsum:bigint>
 -- !query
 SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s
 -- !query schema
-struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+struct<count(1) OVER (PARTITION BY four ORDER BY four ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
 -- !query output
 
 

diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -215,7 +215,7 @@ struct<x:bigint,sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN CAST((- 1)
 -- !query
 SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk2)s LIMIT 0
 -- !query schema
-struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+struct<count(1) OVER (PARTITION BY four ORDER BY four ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
 -- !query output
 
 

diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
@@ -318,10 +318,18 @@ NULL	a	NULL
 -- !query
 SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, udf(val)
 -- !query schema
-struct<>
+struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,row_number() OVER (PARTITION BY cate ORDER BY cate ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
 -- !query output
-org.apache.spark.sql.AnalysisException
-Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
+NULL	NULL	1
+3	NULL	2
+NULL	a	1
+1	a	2
+1	a	3
+2	a	4
+1	b	1
+2	b	2
+3	b	3
+
 
 
 -- !query

diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -318,10 +318,17 @@ NULL	a	NULL
 -- !query
 SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
 -- !query schema
-struct<>
+struct<val:int,cate:string,row_number() OVER (PARTITION BY cate ORDER BY cate ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
 -- !query output
-org.apache.spark.sql.AnalysisException
-Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
+NULL	NULL	1
+3	NULL	2
+NULL	a	1
+1	a	2
+1	a	3
+2	a	4
+1	b	1
+2	b	2
+3	b	3
 
 
 -- !query

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
@@ -487,4 +487,48 @@ class SQLWindowFunctionSuite extends QueryTest with SharedSparkSession {
 
     spark.catalog.dropTempView("nums")
   }
+
+  test("Window function set partitionSpec as order spec when orderSpec is empty") {
+    val data = Seq(
+      WindowData(1, "a", 5),
+      WindowData(2, "a", 6),
+      WindowData(3, "b", 7),
+      WindowData(4, "b", 8),
+      WindowData(5, "c", 9),
+      WindowData(6, "c", 10)
+    )
+    sparkContext.parallelize(data).toDF().createOrReplaceTempView("windowData")
+
+    checkAnswer(
+      sql(
+        """
+          |select month, area, product, lead(month) over (partition by area) as lead_month
+          |from windowData
+        """.stripMargin),
+      Seq(
+        (1, "a", 5, 2),
+        (2, "a", 6, null),
+        (3, "b", 7, 4),
+        (4, "b", 8, null),
+        (5, "c", 9, 6),
+        (6, "c", 10, null)
+      ).map(i => Row(i._1, i._2, i._3, i._4)))
+
+
+    checkAnswer(
+      sql(
+        """
+          |select month, area, product,
+          |lead(month) over (partition by area order by product desc) as lead_month
+          |from windowData
+        """.stripMargin),
+      Seq(
+        (1, "a", 5, null),
+        (2, "a", 6, 1),
+        (3, "b", 7, null),
+        (4, "b", 8, 3),
+        (5, "c", 9, null),
+        (6, "c", 10, 5)
+      ).map(i => Row(i._1, i._2, i._3, i._4)))
+  }
 }