From 7b5eb25bd52281a89b470541913633932c67b4c0 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Thu, 18 Aug 2016 09:48:59 -0700 Subject: [PATCH 01/19] initial change --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 7 +++++-- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../analysis/SubstituteUnresolvedOrdinals.scala | 2 +- .../spark/sql/catalyst/expressions/SortOrder.scala | 2 +- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 10 ++++++++-- .../org/apache/spark/sql/execution/WindowExec.scala | 2 +- 6 files changed, 17 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 9a643465a9994..d71e1598150d4 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -324,7 +324,7 @@ queryPrimary ; sortItem - : expression ordering=(ASC | DESC)? + : expression ordering=(ASC | DESC)? (NULLS nullOrder=LAST | FIRST)? ; querySpecification @@ -641,7 +641,8 @@ number nonReserved : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS | DATABASES | ADD - | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | MAP | ARRAY | STRUCT + | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST + | MAP | ARRAY | STRUCT | LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS @@ -729,6 +730,8 @@ UNBOUNDED: 'UNBOUNDED'; PRECEDING: 'PRECEDING'; FOLLOWING: 'FOLLOWING'; CURRENT: 'CURRENT'; +FIRST: 'FIRST'; +LAST: 'LAST'; ROW: 'ROW'; WITH: 'WITH'; VALUES: 'VALUES'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 18f814d6cdfd4..9de27fb9cb1f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -714,7 +714,7 @@ class Analyzer( case s @ Sort(orders, global, child) if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) => val newOrders = orders map { - case s @ SortOrder(UnresolvedOrdinal(index), direction) => + case s @ SortOrder(UnresolvedOrdinal(index), direction, _) => if (index > 0 && index <= child.output.size) { SortOrder(child.output(index - 1), direction) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala index 6d8dc8628229a..af0a565f73ae9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala @@ -36,7 +36,7 @@ class SubstituteUnresolvedOrdinals(conf: CatalystConf) extends Rule[LogicalPlan] def apply(plan: LogicalPlan): LogicalPlan = plan transform { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { - case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _) => + case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index de779ed3702d3..d1049c3c5317a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -40,7 +40,7 @@ case object Descending extends SortDirection { * An expression that can be used to sort a tuple. This class extends expression primarily so that * transformations over expression will descend into its child. */ -case class SortOrder(child: Expression, direction: SortDirection) +case class SortOrder(child: Expression, direction: SortDirection, nullFirst: Boolean = true) extends UnaryExpression with Unevaluable { /** Sort order is not foldable because we don't have an eval for it. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index bbbb14df88f8c..171460afcae7d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1206,10 +1206,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Create a [[SortOrder]] expression. */ override def visitSortItem(ctx: SortItemContext): SortOrder = withOrigin(ctx) { + val nullFirst = if (ctx.nullOrder != null) { + ctx.nullOrder.getType == SqlBaseParser.FIRST + } else { + false + } + if (ctx.DESC != null) { - SortOrder(expression(ctx.expression), Descending) + SortOrder(expression(ctx.expression), Descending, nullFirst ) } else { - SortOrder(expression(ctx.expression), Ascending) + SortOrder(expression(ctx.expression), Ascending, nullFirst) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala index 9d006d21d9440..f6e09bff84fe7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala @@ -145,7 +145,7 @@ case class WindowExec( // to the result of bound value projection. This is done manually because we want to use // Code Generation (if it is enabled). val sortExprs = exprs.zipWithIndex.map { case (e, i) => - SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction) + SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction, e.nullFirst) } val ordering = newOrdering(sortExprs, Nil) RangeBoundOrdering(ordering, current, bound) From e361913c1ad18e4eb52a8486d673f5ba9aa684af Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Thu, 18 Aug 2016 15:48:49 -0700 Subject: [PATCH 02/19] SPARK-10747: support NULLS FIRST|LAST in ORDER BY clause --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../sql/catalyst/expressions/SortOrder.scala | 16 ++++++++++++++-- .../expressions/codegen/GenerateOrdering.scala | 9 +++++++-- .../spark/sql/catalyst/parser/AstBuilder.scala | 14 +++++++++----- .../apache/spark/sql/execution/WindowExec.scala | 2 +- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index d71e1598150d4..b475abdce2da9 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -324,7 +324,7 @@ queryPrimary ; sortItem - : expression ordering=(ASC | DESC)? (NULLS nullOrder=LAST | FIRST)? + : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))? ; querySpecification diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index d1049c3c5317a..de8342db0ebdc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -28,6 +28,10 @@ abstract sealed class SortDirection { def sql: String } +abstract sealed class NullOrdering { +def sql: String +} + case object Ascending extends SortDirection { override def sql: String = "ASC" } @@ -36,11 +40,19 @@ case object Descending extends SortDirection { override def sql: String = "DESC" } +case object NullFirst extends NullOrdering{ + override def sql: String = "NULLS FIRST" +} + +case object NullLast extends NullOrdering{ + override def sql: String = "NULLS LAST" +} + /** * An expression that can be used to sort a tuple. This class extends expression primarily so that * transformations over expression will descend into its child. */ -case class SortOrder(child: Expression, direction: SortDirection, nullFirst: Boolean = true) +case class SortOrder(child: Expression, direction: SortDirection, nullOrder: NullOrdering = null) extends UnaryExpression with Unevaluable { /** Sort order is not foldable because we don't have an eval for it. */ @@ -58,7 +70,7 @@ case class SortOrder(child: Expression, direction: SortDirection, nullFirst: Boo override def nullable: Boolean = child.nullable override def toString: String = s"$child ${direction.sql}" - override def sql: String = child.sql + " " + direction.sql + override def sql: String = child.sql + " " + direction.sql + " " + nullOrder.sql def isAscending: Boolean = direction == Ascending } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index f4d35d232e691..26a7908d54ab0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -75,6 +75,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR val comparisons = ordering.map { order => val eval = order.child.genCode(ctx) val asc = order.direction == Ascending + val isNullA = ctx.freshName("isNullA") val primitiveA = ctx.freshName("primitiveA") val isNullB = ctx.freshName("isNullB") @@ -99,9 +100,13 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR if ($isNullA && $isNullB) { // Nothing } else if ($isNullA) { - return ${if (order.direction == Ascending) "-1" else "1"}; + return ${if (order.nullOrder == null) { + if (order.direction == Ascending) "-1" else "1" + } else { if (order.nullOrder == NullFirst) "-1" else "1" }}; } else if ($isNullB) { - return ${if (order.direction == Ascending) "1" else "-1"}; + return ${if (order.nullOrder == null) { + if (order.direction == Ascending) "1" else "-1" + } else { if (order.nullOrder == NullFirst) "1" else "-1"}}; } else { int comp = ${ctx.genComp(order.child.dataType, primitiveA, primitiveB)}; if (comp != 0) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 171460afcae7d..17f7a123cd85e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1206,16 +1206,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Create a [[SortOrder]] expression. */ override def visitSortItem(ctx: SortItemContext): SortOrder = withOrigin(ctx) { - val nullFirst = if (ctx.nullOrder != null) { - ctx.nullOrder.getType == SqlBaseParser.FIRST + val nullOrder = if (ctx.nullOrder != null) { + ctx.nullOrder.getType match { + case SqlBaseParser.FIRST => NullFirst + case SqlBaseParser.LAST => NullLast + case _ => null + } } else { - false + null } if (ctx.DESC != null) { - SortOrder(expression(ctx.expression), Descending, nullFirst ) + SortOrder(expression(ctx.expression), Descending, nullOrder) } else { - SortOrder(expression(ctx.expression), Ascending, nullFirst) + SortOrder(expression(ctx.expression), Ascending, nullOrder) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala index f6e09bff84fe7..7012a68056972 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala @@ -145,7 +145,7 @@ case class WindowExec( // to the result of bound value projection. This is done manually because we want to use // Code Generation (if it is enabled). val sortExprs = exprs.zipWithIndex.map { case (e, i) => - SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction, e.nullFirst) + SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction, e.nullOrder) } val ordering = newOrdering(sortExprs, Nil) RangeBoundOrdering(ordering, current, bound) From d010d8824d9dd45021857869d6997f14b1c6749f Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 19 Aug 2016 12:38:17 -0700 Subject: [PATCH 03/19] SPARK-10747: add testcases --- .../sql/catalyst/analysis/Analyzer.scala | 4 +- .../sql/catalyst/expressions/SortOrder.scala | 3 +- .../codegen/GenerateOrdering.scala | 6 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 184 ++++++++++++++++++ .../sql/hive/execution/SQLQuerySuite.scala | 183 +++++++++++++++++ 5 files changed, 376 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9de27fb9cb1f9..e774c71319d7b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -714,9 +714,9 @@ class Analyzer( case s @ Sort(orders, global, child) if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) => val newOrders = orders map { - case s @ SortOrder(UnresolvedOrdinal(index), direction, _) => + case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrder) => if (index > 0 && index <= child.output.size) { - SortOrder(child.output(index - 1), direction) + SortOrder(child.output(index - 1), direction, nullOrder) } else { s.failAnalysis( s"ORDER BY position $index is not in select list " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index de8342db0ebdc..68a9efe9bd093 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -70,7 +70,8 @@ case class SortOrder(child: Expression, direction: SortDirection, nullOrder: Nul override def nullable: Boolean = child.nullable override def toString: String = s"$child ${direction.sql}" - override def sql: String = child.sql + " " + direction.sql + " " + nullOrder.sql + override def sql: String = + child.sql + " " + direction.sql + s"${if (nullOrder!=null) " " + nullOrder.sql else ""}" def isAscending: Boolean = direction == Ascending } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 26a7908d54ab0..774a3305a6a8c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -102,7 +102,11 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } else if ($isNullA) { return ${if (order.nullOrder == null) { if (order.direction == Ascending) "-1" else "1" - } else { if (order.nullOrder == NullFirst) "-1" else "1" }}; + } else { + if (order.nullOrder == NullFirst) "-1" else "1" + } + }; + } else if ($isNullB) { return ${if (order.nullOrder == null) { if (order.direction == Ascending) "1" else "-1" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index eac266cba55b8..4417c7e570fde 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2661,4 +2661,188 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { data.selectExpr("`part.col1`", "`col.1`")) } } + + test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST in WINDOW specification") { + + withTable("spark_10747") { + sql("create table spark_10747(col1 int, col2 int, col3 int) using parquet") + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), + |(6, 15, 8), (6, 7, 4), (6, 7, 8) + """.stripMargin) + + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) + """.stripMargin + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 desc nulls last, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 desc, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ) + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 desc nulls first, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + Row(6, 10, null, 32):: + Row(6, 13, null, 44):: + Row(6, 9, 10, 51):: + Row(6, 12, 10, 56):: + Row(6, 7, 8, 58):: + Row(6, 15, 8, 56):: + Row(6, 15, 8, 55):: + Row(6, 7, 4, 48):: + Row(6, 11, 4, 33)::Nil + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 asc nulls last, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + Row(6, 7, 4, 25):: + Row(6, 11, 4, 40):: + Row(6, 7, 8, 55):: + Row(6, 15, 8, 57):: + Row(6, 15, 8, 58):: + Row(6, 9, 10, 61):: + Row(6, 12, 10, 59):: + Row(6, 10, null, 44):: + Row(6, 13, null, 35)::Nil + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 asc nulls first, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 asc, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ) + ) + } + } + + /* + + test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { + withTable("spark_10747") { + sql("create table spark_10747(col1 int, col2 int, col3 int) using parquet") + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), + |(6, 15, 8), (6, 7, 4), (6, 7, 8) + """.stripMargin) + + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) + """.stripMargin + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 + """.stripMargin + ) + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS LAST, COL2 + """.stripMargin + ), + + Row(6, 7, 4):: + Row(6, 11, 4):: + Row(6, 7, 8):: + Row(6, 15, 8):: + Row(6, 15, 8):: + Row(6, 9, 10):: + Row(6, 12, 10):: + Row(6, 10, null):: + Row(6, 13, null)::Nil + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 + """.stripMargin + ), + + Row(6, 10, null):: + Row(6, 13, null):: + Row(6, 9, 10):: + Row(6, 12, 10):: + Row(6, 7, 8):: + Row(6, 15, 8):: + Row(6, 15, 8):: + Row(6, 7, 4):: + Row(6, 11, 4)::Nil + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 + """.stripMargin + ) + ) + } + } + */ } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index dc4d099f0f666..35a7a65e9a979 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1808,6 +1808,189 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } + test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST in WINDOW specification") { + + withTable("spark_10747") { + sql("create table spark_10747(col1 int, col2 int, col3 int) stored as parquet") + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), + |(6, 15, 8), (6, 7, 4), (6, 7, 8) + """.stripMargin) + + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) + """.stripMargin + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 desc nulls last, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 desc, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ) + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 desc nulls first, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + Row(6, 10, null, 32):: + Row(6, 13, null, 44):: + Row(6, 9, 10, 51):: + Row(6, 12, 10, 56):: + Row(6, 7, 8, 58):: + Row(6, 15, 8, 56):: + Row(6, 15, 8, 55):: + Row(6, 7, 4, 48):: + Row(6, 11, 4, 33)::Nil + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 asc nulls last, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + Row(6, 7, 4, 25):: + Row(6, 11, 4, 40):: + Row(6, 7, 8, 55):: + Row(6, 15, 8, 57):: + Row(6, 15, 8, 58):: + Row(6, 9, 10, 61):: + Row(6, 12, 10, 59):: + Row(6, 10, null, 44):: + Row(6, 13, null, 35)::Nil + ) + + checkAnswer( + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 asc nulls first, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ), + sql( + """ + |select col1, col2, col3, sum(col2) + | over (partition by col1 + | order by col3 asc, col2 + | rows between 2 preceding and 2 following ) + |from spark_10747 where col1 = 6 + """.stripMargin + ) + ) + } + } + + + test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { + withTable("spark_10747") { + sql("create table spark_10747(col1 int, col2 int, col3 int) stored as parquet") + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), + |(6, 15, 8), (6, 7, 4), (6, 7, 8) + """.stripMargin) + + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) + """.stripMargin + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 + """.stripMargin + ) + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS LAST, COL2 + """.stripMargin + ), + + Row(6, 7, 4):: + Row(6, 11, 4):: + Row(6, 7, 8):: + Row(6, 15, 8):: + Row(6, 15, 8):: + Row(6, 9, 10):: + Row(6, 12, 10):: + Row(6, 10, null):: + Row(6, 13, null)::Nil + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 + """.stripMargin + ), + + Row(6, 10, null):: + Row(6, 13, null):: + Row(6, 9, 10):: + Row(6, 12, 10):: + Row(6, 7, 8):: + Row(6, 15, 8):: + Row(6, 15, 8):: + Row(6, 7, 4):: + Row(6, 11, 4)::Nil + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 + """.stripMargin + ) + ) + } + } + + def testCommandAvailable(command: String): Boolean = { val attempt = Try(Process(command).run(ProcessLogger(_ => ())).exitValue()) attempt.isSuccess && attempt.get == 0 From d7f8ded2bd74e1ec2ba3232c24d66c76ff5b268c Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 19 Aug 2016 21:10:37 -0700 Subject: [PATCH 04/19] SPARK-10747: update testcases --- .../codegen/GenerateOrdering.scala | 11 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 82 ----------- .../sql/hive/execution/SQLQuerySuite.scala | 136 +++++++++--------- 3 files changed, 75 insertions(+), 154 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 774a3305a6a8c..976fb931ed3d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -100,17 +100,18 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR if ($isNullA && $isNullB) { // Nothing } else if ($isNullA) { - return ${if (order.nullOrder == null) { + return ${ if (order.nullOrder == null) { if (order.direction == Ascending) "-1" else "1" } else { if (order.nullOrder == NullFirst) "-1" else "1" - } - }; + }}; } else if ($isNullB) { - return ${if (order.nullOrder == null) { + return ${ if (order.nullOrder == null) { if (order.direction == Ascending) "1" else "-1" - } else { if (order.nullOrder == NullFirst) "1" else "-1"}}; + } else { + if (order.nullOrder == NullFirst) "1" else "-1" + }}; } else { int comp = ${ctx.genComp(order.child.dataType, primitiveA, primitiveB)}; if (comp != 0) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 4417c7e570fde..142180fe9c38e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2763,86 +2763,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } } - - /* - - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { - withTable("spark_10747") { - sql("create table spark_10747(col1 int, col2 int, col3 int) using parquet") - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), - |(6, 15, 8), (6, 7, 4), (6, 7, 8) - """.stripMargin) - - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) - """.stripMargin - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 - """.stripMargin - ) - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS LAST, COL2 - """.stripMargin - ), - - Row(6, 7, 4):: - Row(6, 11, 4):: - Row(6, 7, 8):: - Row(6, 15, 8):: - Row(6, 15, 8):: - Row(6, 9, 10):: - Row(6, 12, 10):: - Row(6, 10, null):: - Row(6, 13, null)::Nil - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 - """.stripMargin - ), - - Row(6, 10, null):: - Row(6, 13, null):: - Row(6, 9, 10):: - Row(6, 12, 10):: - Row(6, 7, 8):: - Row(6, 15, 8):: - Row(6, 15, 8):: - Row(6, 7, 4):: - Row(6, 11, 4)::Nil - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 - """.stripMargin - ) - ) - } - } - */ } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 35a7a65e9a979..eb5fe2df6b1d1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1910,84 +1910,86 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { - withTable("spark_10747") { - sql("create table spark_10747(col1 int, col2 int, col3 int) stored as parquet") - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), - |(6, 15, 8), (6, 7, 4), (6, 7, 8) - """.stripMargin) - - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) - """.stripMargin - ) - - checkAnswer( + withTable("spark_10747") { + sql("create table spark_10747(col1 int, col2 int, col3 int) stored as parquet") sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 - """.stripMargin - ) - ) + |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), + |(6, 15, 8), (6, 7, 4), (6, 7, 8) + """.stripMargin) - checkAnswer( sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS LAST, COL2 - """.stripMargin - ), + |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) + """. + stripMargin + ) - Row(6, 7, 4):: - Row(6, 11, 4):: - Row(6, 7, 8):: - Row(6, 15, 8):: - Row(6, 15, 8):: - Row(6, 9, 10):: - Row(6, 12, 10):: - Row(6, 10, null):: - Row(6, 13, null)::Nil - ) + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC + """.stripMargin + ) + ) - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 - """.stripMargin - ), + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS LAST + """.stripMargin + ), + + Row(6, 11, 4) :: + Row(6, 7, 4) :: + Row(6, 15, 8) :: + Row(6, 15, 8) :: + Row(6, 7, 8) :: + Row(6, 12, 10) :: + Row(6, 9, 10) :: + Row(6, 13, null) :: + Row(6, 10, null) :: Nil + ) - Row(6, 10, null):: - Row(6, 13, null):: - Row(6, 9, 10):: - Row(6, 12, 10):: - Row(6, 7, 8):: - Row(6, 15, 8):: - Row(6, 15, 8):: - Row(6, 7, 4):: - Row(6, 11, 4)::Nil - ) + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST + """.stripMargin + ), + + Row(6, 13, null) :: + Row(6, 10, null) :: + Row(6, 12, 10) :: + Row(6, 9, 10) :: + Row(6, 15, 8) :: + Row(6, 15, 8) :: + Row(6, 7, 8) :: + Row(6, 11, 4) :: + Row(6, 7, 4) :: Nil + ) - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 - """.stripMargin + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC + """. + stripMargin + ) ) - ) - } + } + } From 60de0bbb808fd35b80caba880ce68742ab24b2e2 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 26 Aug 2016 10:59:20 -0700 Subject: [PATCH 05/19] SPARK-10747: prefix comparator enhancement to support nulls last/first --- .../unsafe/sort/PrefixComparator.java | 1 + .../unsafe/sort/PrefixComparators.java | 99 ++++++++++++++++--- .../unsafe/sort/UnsafeInMemorySorter.java | 16 ++- .../codegen/GenerateOrdering.scala | 2 - .../spark/sql/execution/SortPrefixUtils.scala | 22 ++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 80 +++++++++++++++ 6 files changed, 201 insertions(+), 19 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java index 45b78829e4cf7..be738d29d98af 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java @@ -25,5 +25,6 @@ */ @Private public abstract class PrefixComparator { + public enum NullOrder {FIRST, LAST} public abstract int compare(long prefix1, long prefix2); } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index c44630fbbc2f0..854547ee11e8a 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -27,14 +27,22 @@ public class PrefixComparators { private PrefixComparators() {} - public static final PrefixComparator STRING = new UnsignedPrefixComparator(); - public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(); - public static final PrefixComparator BINARY = new UnsignedPrefixComparator(); - public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(); - public static final PrefixComparator LONG = new SignedPrefixComparator(); - public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(); - public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(); - public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator STRING = new UnsignedPrefixComparator(null); + public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(null); + public static final PrefixComparator BINARY = new UnsignedPrefixComparator(null); + public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(null); + public static final PrefixComparator LONG = new SignedPrefixComparator(null); + public static final PrefixComparator LONG_NULLFIRST = + new SignedPrefixComparator(PrefixComparator.NullOrder.FIRST); + public static final PrefixComparator LONG_NULLLAST = + new SignedPrefixComparator(PrefixComparator.NullOrder.LAST); + public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(null); + public static final PrefixComparator LONG_DESC_NULLFIRST = + new SignedPrefixComparatorDesc(PrefixComparator.NullOrder.FIRST); + public static final PrefixComparator LONG_DESC_NULLLAST = + new SignedPrefixComparatorDesc(PrefixComparator.NullOrder.LAST); + public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(null); + public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(null); public static final class StringPrefixComparator { public static long computePrefix(UTF8String value) { @@ -69,11 +77,21 @@ public static long computePrefix(double value) { * ordering they define is compatible with radix sort. */ public abstract static class RadixSortSupport extends PrefixComparator { + private NullOrder nullOrder = null; + public RadixSortSupport() {} + public RadixSortSupport(NullOrder nullOrder) { + super(); + this.nullOrder = nullOrder; + } + /** @return Whether the sort should be descending in binary sort order. */ public abstract boolean sortDescending(); /** @return Whether the sort should take into account the sign bit. */ public abstract boolean sortSigned(); + + /** @return Whether the sort should put null values first */ + public NullOrder nullOrder() {return nullOrder;} } // @@ -81,37 +99,94 @@ public abstract static class RadixSortSupport extends PrefixComparator { // public static final class UnsignedPrefixComparator extends RadixSortSupport { + public UnsignedPrefixComparator(NullOrder nullOrder) { + super(nullOrder); + } @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } - @Override public int compare(long aPrefix, long bPrefix) { + return UnsignedLongs.compare(aPrefix, bPrefix); } } public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { + public UnsignedPrefixComparatorDesc(NullOrder nullOrder) { + super(nullOrder); + } @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } - @Override public int compare(long bPrefix, long aPrefix) { return UnsignedLongs.compare(aPrefix, bPrefix); } } public static final class SignedPrefixComparator extends RadixSortSupport { + public SignedPrefixComparator(NullOrder nullOrder) { + super(nullOrder); + } @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } - @Override public int compare(long a, long b) { + if (nullOrder() != null) { + if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { + return 0; + } + if (a == Long.MIN_VALUE) { + switch (nullOrder()) { + case FIRST: + return -1; + case LAST: + return 1; + default: + break; + } + } else if (b == Long.MIN_VALUE) { + switch (nullOrder()) { + case FIRST: + return 1; + case LAST: + return -1; + default: + break; + } + } + } return (a < b) ? -1 : (a > b) ? 1 : 0; } } public static final class SignedPrefixComparatorDesc extends RadixSortSupport { + public SignedPrefixComparatorDesc(NullOrder nullOrder) { + super(nullOrder); + } @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } - @Override public int compare(long b, long a) { + if (nullOrder() != null) { + if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { + return 0; + } + if (b == Long.MIN_VALUE) { + switch (nullOrder()) { + case FIRST: + return -1; + case LAST: + return 1; + default: + break; + } + } else if (a == Long.MIN_VALUE) { + switch (nullOrder()) { + case FIRST: + return 1; + case LAST: + return -1; + default: + break; + } + } + } return (a < b) ? -1 : (a > b) ? 1 : 0; } } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index 30d0f3006a04e..64f22ef2e3d9d 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -333,13 +333,23 @@ public UnsafeSorterIterator getSortedIterator() { if (nullBoundaryPos > 0) { assert radixSortSupport != null : "Nulls are only stored separately with radix sort"; LinkedList queue = new LinkedList<>(); - if (radixSortSupport.sortDescending()) { - // Nulls are smaller than non-nulls + + if (radixSortSupport.nullOrder() == PrefixComparator.NullOrder.LAST) { queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); - } else { + } else if (radixSortSupport.nullOrder() == PrefixComparator.NullOrder.FIRST) { queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); + } else { + if (radixSortSupport.sortDescending()) { + // Nulls are smaller than non-nulls if no Null ordering is specified. + queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); + queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); + + } else { + queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); + queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); + } } return new UnsafeExternalSorter.ChainedIterator(queue); } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 976fb931ed3d8..a0bb1edbe73c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -75,7 +75,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR val comparisons = ordering.map { order => val eval = order.child.genCode(ctx) val asc = order.direction == Ascending - val isNullA = ctx.freshName("isNullA") val primitiveA = ctx.freshName("primitiveA") val isNullB = ctx.freshName("isNullB") @@ -105,7 +104,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } else { if (order.nullOrder == NullFirst) "-1" else "1" }}; - } else if ($isNullB) { return ${ if (order.nullOrder == null) { if (order.direction == Ascending) "1" else "-1" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index 940467e74d597..0b825d025fa32 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -45,9 +45,9 @@ object SortPrefixUtils { case BinaryType => if (sortOrder.isAscending) PrefixComparators.BINARY else PrefixComparators.BINARY_DESC case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType => - if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC + getPrefixComparatorWithNullOrder(sortOrder) case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC + getPrefixComparatorWithNullOrder(sortOrder) case FloatType | DoubleType => if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC case dt: DecimalType => @@ -56,6 +56,24 @@ object SortPrefixUtils { } } + private def getPrefixComparatorWithNullOrder(sortOrder: SortOrder): PrefixComparator = { + if (sortOrder.isAscending) { + if (sortOrder.nullOrder != null) { + if (sortOrder.nullOrder == NullFirst) PrefixComparators.LONG_NULLFIRST + else PrefixComparators.LONG_NULLLAST + } else { + PrefixComparators.LONG + } + } else { + if (sortOrder.nullOrder != null) { + if (sortOrder.nullOrder == NullFirst) PrefixComparators.LONG_DESC_NULLFIRST + else PrefixComparators.LONG_DESC_NULLLAST + } else { + PrefixComparators.LONG_DESC + } + } + } + /** * Creates the prefix comparator for the first field in the given schema, in ascending order. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 142180fe9c38e..f3ccfb56488c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2763,4 +2763,84 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } } + + test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { + withTable("spark_10747") { + sql("create table spark_10747(col1 int, col2 int, col3 int) using parquet") + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), + |(6, 15, 8), (6, 7, 4), (6, 7, 8) + """.stripMargin) + + sql( + """ + |INSERT INTO spark_10747 VALUES (6, 10, null), (6, 13, null) + """. + stripMargin + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 + """.stripMargin + ) + ) + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2 + """.stripMargin + ), + + Row(6, 7, 4) :: + Row(6, 11, 4) :: + Row(6, 7, 8) :: + Row(6, 15, 8) :: + Row(6, 15, 8) :: + Row(6, 9, 10) :: + Row(6, 12, 10) :: + Row(6, 10, null) :: + Row(6, 13, null) :: Nil + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 + """.stripMargin + ), + + Row(6, 10, null) :: + Row(6, 13, null) :: + Row(6, 9, 10) :: + Row(6, 12, 10) :: + Row(6, 7, 8) :: + Row(6, 15, 8) :: + Row(6, 15, 8) :: + Row(6, 7, 4) :: + Row(6, 11, 4) :: Nil + ) + + checkAnswer( + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 + """.stripMargin + ), + sql( + """ + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 + """. + stripMargin + ) + ) + } + } } From 7d4f8f9df73b6197a0ac33dbd9fecb03ab3695de Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 26 Aug 2016 19:20:38 -0700 Subject: [PATCH 06/19] SPARK-10747: refine prefix comparator --- .../unsafe/sort/PrefixComparators.java | 237 +++++++++++++----- .../spark/sql/execution/SortPrefixUtils.scala | 67 +++-- .../sql/hive/execution/SQLQuerySuite.scala | 38 +-- 3 files changed, 240 insertions(+), 102 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index 854547ee11e8a..ba6ddc82aaa9d 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -27,22 +27,54 @@ public class PrefixComparators { private PrefixComparators() {} - public static final PrefixComparator STRING = new UnsignedPrefixComparator(null); - public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(null); - public static final PrefixComparator BINARY = new UnsignedPrefixComparator(null); - public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(null); - public static final PrefixComparator LONG = new SignedPrefixComparator(null); + public static final PrefixComparator STRING = new UnsignedPrefixComparator(); + public static final PrefixComparator STRING_NULLFIRST = + new UnsignedPrefixComparatorNullFirst(); + public static final PrefixComparator STRING_NULLLAST = + new UnsignedPrefixComparatorNullLast(); + + public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator STRING_DESC_NULLFIRST = + new UnsignedPrefixComparatorDescNullFirst(); + public static final PrefixComparator STRING_DESC_NULLLAST = + new UnsignedPrefixComparatorDescNullLast(); + + + public static final PrefixComparator BINARY = new UnsignedPrefixComparator(); + public static final PrefixComparator BINARY_NULLFIRST = + new UnsignedPrefixComparatorNullFirst(); + public static final PrefixComparator BINARY_NULLLAST = + new UnsignedPrefixComparatorNullLast(); + + public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator BINARY_DESC_NULLFIRST = + new UnsignedPrefixComparatorDescNullFirst(); + public static final PrefixComparator BINARY_DESC_NULLLAST = + new UnsignedPrefixComparatorDescNullLast(); + + public static final PrefixComparator LONG = new SignedPrefixComparator(); public static final PrefixComparator LONG_NULLFIRST = - new SignedPrefixComparator(PrefixComparator.NullOrder.FIRST); + new SignedPrefixComparatorDescNullFirst(); public static final PrefixComparator LONG_NULLLAST = - new SignedPrefixComparator(PrefixComparator.NullOrder.LAST); - public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(null); + new SignedPrefixComparatorDescNullLast(); + + public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(); public static final PrefixComparator LONG_DESC_NULLFIRST = - new SignedPrefixComparatorDesc(PrefixComparator.NullOrder.FIRST); + new SignedPrefixComparatorDescNullFirst(); public static final PrefixComparator LONG_DESC_NULLLAST = - new SignedPrefixComparatorDesc(PrefixComparator.NullOrder.LAST); - public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(null); - public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(null); + new SignedPrefixComparatorDescNullLast(); + + public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(); + public static final PrefixComparator DOUBLE_NULLFIRST = + new UnsignedPrefixComparatorNullFirst(); + public static final PrefixComparator DOUBLE_NULLLAST = + new UnsignedPrefixComparatorNullLast(); + + public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator DOUBLE_DESC_NULLFIRST = + new UnsignedPrefixComparatorDescNullFirst(); + public static final PrefixComparator DOUBLE_DESC_NULLLAST = + new UnsignedPrefixComparatorDescNullLast(); public static final class StringPrefixComparator { public static long computePrefix(UTF8String value) { @@ -77,12 +109,6 @@ public static long computePrefix(double value) { * ordering they define is compatible with radix sort. */ public abstract static class RadixSortSupport extends PrefixComparator { - private NullOrder nullOrder = null; - public RadixSortSupport() {} - public RadixSortSupport(NullOrder nullOrder) { - super(); - this.nullOrder = nullOrder; - } /** @return Whether the sort should be descending in binary sort order. */ public abstract boolean sortDescending(); @@ -90,103 +116,182 @@ public RadixSortSupport(NullOrder nullOrder) { /** @return Whether the sort should take into account the sign bit. */ public abstract boolean sortSigned(); - /** @return Whether the sort should put null values first */ - public NullOrder nullOrder() {return nullOrder;} + /** @return Whether the sort should put null first or last. */ + public abstract NullOrder nullOrder(); } // // Standard prefix comparator implementations // - public static final class UnsignedPrefixComparator extends RadixSortSupport { - public UnsignedPrefixComparator(NullOrder nullOrder) { - super(nullOrder); + public static final class UnsignedPrefixComparatorNullFirst extends RadixSortSupport { + @Override public boolean sortDescending() { return false; } + @Override public boolean sortSigned() { return false; } + @Override public NullOrder nullOrder() { return NullOrder.FIRST; } + public int compare(long aPrefix, long bPrefix) { + if (aPrefix == 0L && bPrefix == 0L) { + return 0; + } + if (aPrefix == 0L) { + return -1; + } else if (bPrefix == 0L) { + return 1; + } + return UnsignedLongs.compare(aPrefix, bPrefix); } + } + + public static final class UnsignedPrefixComparatorNullLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } + @Override public NullOrder nullOrder() { return NullOrder.LAST; } public int compare(long aPrefix, long bPrefix) { + if (aPrefix == 0L && bPrefix == 0L) { + return 0; + } + if (aPrefix == 0L) { + return 1; + } else if (bPrefix == 0L) { + return -1; + } + return UnsignedLongs.compare(aPrefix, bPrefix); + } + } + public static final class UnsignedPrefixComparator extends RadixSortSupport { + @Override public boolean sortDescending() { return false; } + @Override public boolean sortSigned() { return false; } + @Override public NullOrder nullOrder() { return null; } + public int compare(long aPrefix, long bPrefix) { return UnsignedLongs.compare(aPrefix, bPrefix); } } - public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { - public UnsignedPrefixComparatorDesc(NullOrder nullOrder) { - super(nullOrder); + public static final class UnsignedPrefixComparatorDescNullFirst extends RadixSortSupport { + @Override public boolean sortDescending() { return true; } + @Override public boolean sortSigned() { return false; } + @Override public NullOrder nullOrder() { return NullOrder.FIRST; } + public int compare(long bPrefix, long aPrefix) { + if (aPrefix == 0L && bPrefix == 0L) { + return 0; + } + if (bPrefix == 0L) { + return -1; + } else if (aPrefix == 0L) { + return 1; + } + return UnsignedLongs.compare(aPrefix, bPrefix); } + } + + public static final class UnsignedPrefixComparatorDescNullLast extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } + @Override public NullOrder nullOrder() { return NullOrder.LAST; } public int compare(long bPrefix, long aPrefix) { + if (aPrefix == 0L && bPrefix == 0L) { + return 0; + } + if (bPrefix == 0L) { + return 1; + } else if (aPrefix == 0L) { + return -1; + } return UnsignedLongs.compare(aPrefix, bPrefix); } } - public static final class SignedPrefixComparator extends RadixSortSupport { - public SignedPrefixComparator(NullOrder nullOrder) { - super(nullOrder); + public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { + @Override public boolean sortDescending() { return true; } + @Override public boolean sortSigned() { return false; } + @Override public NullOrder nullOrder() { return null; } + public int compare(long bPrefix, long aPrefix) { + return UnsignedLongs.compare(aPrefix, bPrefix); } + } + + public static final class SingedPrefixComparatorNullFirst extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } + @Override public NullOrder nullOrder() { return NullOrder.FIRST; } public int compare(long a, long b) { - if (nullOrder() != null) { if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { return 0; } if (a == Long.MIN_VALUE) { - switch (nullOrder()) { - case FIRST: - return -1; - case LAST: - return 1; - default: - break; - } + return -1; } else if (b == Long.MIN_VALUE) { - switch (nullOrder()) { - case FIRST: - return 1; - case LAST: - return -1; - default: - break; - } + return 1; } + return (a < b) ? -1 : (a > b) ? 1 : 0; + } + } + + public static final class SingedPrefixComparatorNullLast extends RadixSortSupport { + @Override public boolean sortDescending() { return false; } + @Override public boolean sortSigned() { return true; } + @Override public NullOrder nullOrder() { return NullOrder.LAST; } + public int compare(long a, long b) { + if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { + return 0; + } + if (a == Long.MIN_VALUE) { + return 1; + } else if (b == Long.MIN_VALUE) { + return -1; } return (a < b) ? -1 : (a > b) ? 1 : 0; } } - public static final class SignedPrefixComparatorDesc extends RadixSortSupport { - public SignedPrefixComparatorDesc(NullOrder nullOrder) { - super(nullOrder); + public static final class SignedPrefixComparator extends RadixSortSupport { + @Override public boolean sortDescending() { return false; } + @Override public boolean sortSigned() { return true; } + @Override public NullOrder nullOrder() { return null; } + public int compare(long a, long b) { + return (a < b) ? -1 : (a > b) ? 1 : 0; } + } + + public static final class SignedPrefixComparatorDescNullFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } + @Override public NullOrder nullOrder() { return NullOrder.FIRST; } public int compare(long b, long a) { - if (nullOrder() != null) { if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { return 0; } if (b == Long.MIN_VALUE) { - switch (nullOrder()) { - case FIRST: - return -1; - case LAST: - return 1; - default: - break; - } + return -1; } else if (a == Long.MIN_VALUE) { - switch (nullOrder()) { - case FIRST: - return 1; - case LAST: - return -1; - default: - break; - } + return 1; } + return (a < b) ? -1 : (a > b) ? 1 : 0; + } + } + + public static final class SignedPrefixComparatorDescNullLast extends RadixSortSupport { + @Override public boolean sortDescending() { return true; } + @Override public boolean sortSigned() { return true; } + @Override public NullOrder nullOrder() { return NullOrder.LAST; } + public int compare(long b, long a) { + if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { + return 0; } + if (b == Long.MIN_VALUE) { + return 1; + } else if (a == Long.MIN_VALUE) { + return -1; + } + return (a < b) ? -1 : (a > b) ? 1 : 0; + } + } + + public static final class SignedPrefixComparatorDesc extends RadixSortSupport { + @Override public boolean sortDescending() { return true; } + @Override public boolean sortSigned() { return true; } + @Override public NullOrder nullOrder() { return null; } + public int compare(long b, long a) { return (a < b) ? -1 : (a > b) ? 1 : 0; } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index 0b825d025fa32..f21f4b0747aa2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -40,36 +40,69 @@ object SortPrefixUtils { def getPrefixComparator(sortOrder: SortOrder): PrefixComparator = { sortOrder.dataType match { - case StringType => - if (sortOrder.isAscending) PrefixComparators.STRING else PrefixComparators.STRING_DESC - case BinaryType => - if (sortOrder.isAscending) PrefixComparators.BINARY else PrefixComparators.BINARY_DESC + case StringType => getPrefixComparatorWithNullOrder(sortOrder, "STRING") + case BinaryType => getPrefixComparatorWithNullOrder(sortOrder, "BINARY") case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType => - getPrefixComparatorWithNullOrder(sortOrder) + getPrefixComparatorWithNullOrder(sortOrder, "LONG") case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - getPrefixComparatorWithNullOrder(sortOrder) - case FloatType | DoubleType => - if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC - case dt: DecimalType => - if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC + getPrefixComparatorWithNullOrder(sortOrder, "LONG") + case FloatType | DoubleType => getPrefixComparatorWithNullOrder(sortOrder, "DOUBLE") + case dt: DecimalType => getPrefixComparatorWithNullOrder(sortOrder, "DOUBLE") case _ => NoOpPrefixComparator } } - private def getPrefixComparatorWithNullOrder(sortOrder: SortOrder): PrefixComparator = { + private def getPrefixComparatorWithNullOrder( + sortOrder: SortOrder, signedType: String): PrefixComparator = { if (sortOrder.isAscending) { if (sortOrder.nullOrder != null) { - if (sortOrder.nullOrder == NullFirst) PrefixComparators.LONG_NULLFIRST - else PrefixComparators.LONG_NULLLAST + if (sortOrder.nullOrder == NullFirst) { + signedType match { + case "LONG" => PrefixComparators.LONG_NULLFIRST + case "STRING" => PrefixComparators.STRING_NULLFIRST + case "BINARY" => PrefixComparators.BINARY_NULLFIRST + case "DOUBLE" => PrefixComparators.DOUBLE_NULLFIRST + } + } else { + signedType match { + case "LONG" => PrefixComparators.LONG_NULLLAST + case "STRING" => PrefixComparators.STRING_NULLLAST + case "BINARY" => PrefixComparators.BINARY_NULLLAST + case "DOUBLE" => PrefixComparators.DOUBLE_NULLLAST + } + } } else { - PrefixComparators.LONG + signedType match { + case "LONG" => PrefixComparators.LONG + case "STRING" => PrefixComparators.STRING + case "BINARY" => PrefixComparators.BINARY + case "DOUBLE" => PrefixComparators.DOUBLE + } } } else { if (sortOrder.nullOrder != null) { - if (sortOrder.nullOrder == NullFirst) PrefixComparators.LONG_DESC_NULLFIRST - else PrefixComparators.LONG_DESC_NULLLAST + if (sortOrder.nullOrder == NullFirst) { + signedType match { + case "LONG" => PrefixComparators.LONG_DESC_NULLFIRST + case "STRING" => PrefixComparators.STRING_DESC_NULLFIRST + case "BINARY" => PrefixComparators.BINARY_DESC_NULLFIRST + case "DOUBLE" => PrefixComparators.DOUBLE_DESC_NULLFIRST + } + } else { + signedType match { + case "LONG" => PrefixComparators.LONG_DESC_NULLLAST + case "STRING" => PrefixComparators.STRING_DESC_NULLLAST + case "BINARY" => PrefixComparators.BINARY_DESC_NULLLAST + case "DOUBLE" => PrefixComparators.DOUBLE_DESC_NULLLAST + } + } } else { - PrefixComparators.LONG_DESC + signedType match { + case "LONG" => PrefixComparators.LONG_DESC + case "STRING" => PrefixComparators.STRING_DESC + case "BINARY" => PrefixComparators.BINARY_DESC + case "DOUBLE" => PrefixComparators.DOUBLE_DESC + } } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index eb5fe2df6b1d1..8b881f568ad40 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1921,7 +1921,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { sql( """ - |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) + |INSERT INTO spark_10747 VALUES (6, 10, null), (6, 13, null) """. stripMargin ) @@ -1929,12 +1929,12 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkAnswer( sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 """.stripMargin ), sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 """.stripMargin ) ) @@ -1942,48 +1942,48 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkAnswer( sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS LAST + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2 """.stripMargin ), - Row(6, 11, 4) :: - Row(6, 7, 4) :: + Row(6, 7, 4) :: + Row(6, 11, 4) :: + Row(6, 7, 8) :: Row(6, 15, 8) :: Row(6, 15, 8) :: - Row(6, 7, 8) :: - Row(6, 12, 10) :: Row(6, 9, 10) :: - Row(6, 13, null) :: - Row(6, 10, null) :: Nil + Row(6, 12, 10) :: + Row(6, 10, null) :: + Row(6, 13, null) :: Nil ) checkAnswer( sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 """.stripMargin ), - Row(6, 13, null) :: - Row(6, 10, null) :: - Row(6, 12, 10) :: + Row(6, 10, null) :: + Row(6, 13, null) :: Row(6, 9, 10) :: + Row(6, 12, 10) :: + Row(6, 7, 8) :: Row(6, 15, 8) :: Row(6, 15, 8) :: - Row(6, 7, 8) :: - Row(6, 11, 4) :: - Row(6, 7, 4) :: Nil + Row(6, 7, 4) :: + Row(6, 11, 4) :: Nil ) checkAnswer( sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 """.stripMargin ), sql( """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC + |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 """. stripMargin ) From b6722be85a0a796a4b67fc3d55025e2bcdede97d Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 26 Aug 2016 21:06:46 -0700 Subject: [PATCH 07/19] SPARK-10747: add testcases --- .../unsafe/sort/RadixSortSuite.scala | 1 + .../sql/hive/execution/SQLQuerySuite.scala | 75 ++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index 2c13806410192..dc9b6e11ec758 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -52,6 +52,7 @@ class RadixSortSuite extends SparkFunSuite with Logging { new PrefixComparators.RadixSortSupport { override def sortDescending = false override def sortSigned = false + override def nullOrder = null override def compare(a: Long, b: Long): Int = { return PrefixComparators.BINARY.compare(a & 0xffffff0000L, b & 0xffffff0000L) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 8b881f568ad40..e698e04556237 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1989,9 +1989,82 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ) ) } - } + test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST with mixed datatypes") { + withTable("spark_10747") { + sql( + """ + |create table spark_10747( + |col1 string, + |col2 int, + |col3 double, + |col4 decimal(10,2), + |col5 decimal(20,1)) + |stored as parquet + """.stripMargin) + sql( + """ + |INSERT INTO spark_10747 VALUES + |('b', 2, 1.0, 1.00, 10.0), + |('d', 3, 2.0, 3.00, 0.0), + |('c', 3, 2.0, 2.00, 15.1), + |('d', 3, 0.0, 3.00, 1.0), + |(null, 3, 0.0, 3.00, 1.0), + |('d', 3, null, 4.00, 1.0), + |('a', 1, 1.0, 1.00, null), + |('c', 3, 2.0, 2.00, null) + """.stripMargin) + + checkAnswer( + sql( + """ + |select * from spark_10747 order by col1 nulls last, col5 nulls last + """.stripMargin + ), + Row("a", 1, 1.0, 1.00, null) :: + Row("b", 2, 1.0, 1.00, 10.0) :: + Row("c", 3, 2.0, 2.00, 15.1) :: + Row("c", 3, 2.0, 2.00, null) :: + Row("d", 3, 2.0, 3.00, 0.0) :: + Row("d", 3, 0.0, 3.00, 1.0) :: + Row("d", 3, null, 4.00, 1.0) :: + Row(null, 3, 0.0, 3.00, 1.0) :: Nil + ) + + checkAnswer( + sql( + """ + |select * from spark_10747 order by col1 desc nulls first, col5 desc nulls first + """.stripMargin + ), + Row(null, 3, 0.0, 3.00, 1.0):: + Row("d", 3, 0.0, 3.00, 1.0):: + Row("d", 3, null, 4.00, 1.0):: + Row("d", 3, 2.0, 3.00, 0.0):: + Row("c", 3, 2.0, 2.00, null):: + Row("c", 3, 2.0, 2.00, 15.1):: + Row("b", 2, 1.0, 1.00, 10.0):: + Row("a", 1, 1.0, 1.00, null)::Nil + ) + + checkAnswer( + sql( + """ + |select * from spark_10747 order by col5 desc nulls first, col3 desc nulls last + """.stripMargin + ), + Row("c", 3, 2.0, 2.00, null):: + Row("a", 1, 1.0, 1.00, null):: + Row("c", 3, 2.0, 2.00, 15.1):: + Row("b", 2, 1.0, 1.00, 10.0):: + Row("d", 3, 0.0, 3.00, 1.0):: + Row(null, 3, 0.0, 3.00, 1.0):: + Row("d", 3, null, 4.00, 1.0):: + Row("d", 3, 2.0, 3.00, 0.0)::Nil + ) + } + } def testCommandAvailable(command: String): Boolean = { val attempt = Try(Process(command).run(ProcessLogger(_ => ())).exitValue()) From 3ff55602eacd07a343dbacdc11d79d33f5ac547c Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Tue, 30 Aug 2016 13:35:21 -0700 Subject: [PATCH 08/19] SPARK-10747: change based on review --- .../unsafe/sort/PrefixComparators.java | 97 ++----- .../unsafe/sort/UnsafeInMemorySorter.java | 11 +- .../unsafe/sort/RadixSortSuite.scala | 28 +- .../sql/catalyst/analysis/Analyzer.scala | 4 +- .../SubstituteUnresolvedOrdinals.scala | 2 +- .../sql/catalyst/expressions/SortOrder.scala | 25 +- .../codegen/GenerateOrdering.scala | 22 +- .../expressions/windowExpressions.scala | 4 +- .../sql/catalyst/parser/AstBuilder.scala | 30 +- .../spark/sql/execution/SortPrefixUtils.scala | 55 ++-- .../spark/sql/execution/WindowExec.scala | 6 +- .../inputs/orderby-nulls-ordering.sql | 83 ++++++ .../results/orderby-nulls-ordering.sql.out | 254 +++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 182 ------------ .../sql/hive/execution/SQLQuerySuite.scala | 258 ------------------ 15 files changed, 456 insertions(+), 605 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index ba6ddc82aaa9d..65b4f987f9e42 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -27,54 +27,45 @@ public class PrefixComparators { private PrefixComparators() {} - public static final PrefixComparator STRING = new UnsignedPrefixComparator(); - public static final PrefixComparator STRING_NULLFIRST = - new UnsignedPrefixComparatorNullFirst(); + public static final PrefixComparator STRING = + new UnsignedPrefixComparator(); public static final PrefixComparator STRING_NULLLAST = new UnsignedPrefixComparatorNullLast(); - public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(); public static final PrefixComparator STRING_DESC_NULLFIRST = new UnsignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator STRING_DESC_NULLLAST = - new UnsignedPrefixComparatorDescNullLast(); + public static final PrefixComparator STRING_DESC = + new UnsignedPrefixComparatorDesc(); - - public static final PrefixComparator BINARY = new UnsignedPrefixComparator(); - public static final PrefixComparator BINARY_NULLFIRST = - new UnsignedPrefixComparatorNullFirst(); + public static final PrefixComparator BINARY = + new UnsignedPrefixComparator(); public static final PrefixComparator BINARY_NULLLAST = new UnsignedPrefixComparatorNullLast(); - public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(); public static final PrefixComparator BINARY_DESC_NULLFIRST = new UnsignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator BINARY_DESC_NULLLAST = - new UnsignedPrefixComparatorDescNullLast(); + public static final PrefixComparator BINARY_DESC = + new UnsignedPrefixComparatorDesc(); - public static final PrefixComparator LONG = new SignedPrefixComparator(); - public static final PrefixComparator LONG_NULLFIRST = - new SignedPrefixComparatorDescNullFirst(); + public static final PrefixComparator LONG = + new SignedPrefixComparator(); public static final PrefixComparator LONG_NULLLAST = - new SignedPrefixComparatorDescNullLast(); + new SignedPrefixComparatorNullLast(); - public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(); public static final PrefixComparator LONG_DESC_NULLFIRST = new SignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator LONG_DESC_NULLLAST = - new SignedPrefixComparatorDescNullLast(); + public static final PrefixComparator LONG_DESC = + new SignedPrefixComparatorDesc(); - public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(); - public static final PrefixComparator DOUBLE_NULLFIRST = - new UnsignedPrefixComparatorNullFirst(); + public static final PrefixComparator DOUBLE = + new UnsignedPrefixComparator(); public static final PrefixComparator DOUBLE_NULLLAST = new UnsignedPrefixComparatorNullLast(); - public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(); public static final PrefixComparator DOUBLE_DESC_NULLFIRST = new UnsignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator DOUBLE_DESC_NULLLAST = - new UnsignedPrefixComparatorDescNullLast(); + public static final PrefixComparator DOUBLE_DESC = + new UnsignedPrefixComparatorDesc(); public static final class StringPrefixComparator { public static long computePrefix(UTF8String value) { @@ -124,7 +115,8 @@ public abstract static class RadixSortSupport extends PrefixComparator { // Standard prefix comparator implementations // - public static final class UnsignedPrefixComparatorNullFirst extends RadixSortSupport { + // unsigned asc null first (default) + public static final class UnsignedPrefixComparator extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } @Override public NullOrder nullOrder() { return NullOrder.FIRST; } @@ -141,6 +133,7 @@ public int compare(long aPrefix, long bPrefix) { } } + // unsigned asc null last public static final class UnsignedPrefixComparatorNullLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } @@ -158,15 +151,7 @@ public int compare(long aPrefix, long bPrefix) { } } - public static final class UnsignedPrefixComparator extends RadixSortSupport { - @Override public boolean sortDescending() { return false; } - @Override public boolean sortSigned() { return false; } - @Override public NullOrder nullOrder() { return null; } - public int compare(long aPrefix, long bPrefix) { - return UnsignedLongs.compare(aPrefix, bPrefix); - } - } - + // unsigned desc null first public static final class UnsignedPrefixComparatorDescNullFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } @@ -184,7 +169,8 @@ public int compare(long bPrefix, long aPrefix) { } } - public static final class UnsignedPrefixComparatorDescNullLast extends RadixSortSupport { + // unsigned desc null last (default) + public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } @Override public NullOrder nullOrder() { return NullOrder.LAST; } @@ -201,16 +187,8 @@ public int compare(long bPrefix, long aPrefix) { } } - public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { - @Override public boolean sortDescending() { return true; } - @Override public boolean sortSigned() { return false; } - @Override public NullOrder nullOrder() { return null; } - public int compare(long bPrefix, long aPrefix) { - return UnsignedLongs.compare(aPrefix, bPrefix); - } - } - - public static final class SingedPrefixComparatorNullFirst extends RadixSortSupport { + // signed asc null first (default) + public static final class SignedPrefixComparator extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } @Override public NullOrder nullOrder() { return NullOrder.FIRST; } @@ -227,7 +205,8 @@ public int compare(long a, long b) { } } - public static final class SingedPrefixComparatorNullLast extends RadixSortSupport { + // signed asc null last + public static final class SignedPrefixComparatorNullLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } @Override public NullOrder nullOrder() { return NullOrder.LAST; } @@ -244,15 +223,7 @@ public int compare(long a, long b) { } } - public static final class SignedPrefixComparator extends RadixSortSupport { - @Override public boolean sortDescending() { return false; } - @Override public boolean sortSigned() { return true; } - @Override public NullOrder nullOrder() { return null; } - public int compare(long a, long b) { - return (a < b) ? -1 : (a > b) ? 1 : 0; - } - } - + // signed desc null first public static final class SignedPrefixComparatorDescNullFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } @@ -270,7 +241,8 @@ public int compare(long b, long a) { } } - public static final class SignedPrefixComparatorDescNullLast extends RadixSortSupport { + // signed desc null last (default) + public static final class SignedPrefixComparatorDesc extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } @Override public NullOrder nullOrder() { return NullOrder.LAST; } @@ -286,13 +258,4 @@ public int compare(long b, long a) { return (a < b) ? -1 : (a > b) ? 1 : 0; } } - - public static final class SignedPrefixComparatorDesc extends RadixSortSupport { - @Override public boolean sortDescending() { return true; } - @Override public boolean sortSigned() { return true; } - @Override public NullOrder nullOrder() { return null; } - public int compare(long b, long a) { - return (a < b) ? -1 : (a > b) ? 1 : 0; - } - } } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index 64f22ef2e3d9d..a158d4e13ec80 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -334,22 +334,13 @@ public UnsafeSorterIterator getSortedIterator() { assert radixSortSupport != null : "Nulls are only stored separately with radix sort"; LinkedList queue = new LinkedList<>(); + // The null order is either LAST or FIRST, regardless of sorting direction (ASC|DESC) if (radixSortSupport.nullOrder() == PrefixComparator.NullOrder.LAST) { queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); } else if (radixSortSupport.nullOrder() == PrefixComparator.NullOrder.FIRST) { queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); - } else { - if (radixSortSupport.sortDescending()) { - // Nulls are smaller than non-nulls if no Null ordering is specified. - queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); - queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); - - } else { - queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); - queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); - } } return new UnsafeExternalSorter.ChainedIterator(queue); } else { diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index dc9b6e11ec758..eed79f853c169 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.unsafe.array.LongArray import org.apache.spark.unsafe.memory.MemoryBlock import org.apache.spark.util.collection.Sorter +import org.apache.spark.util.collection.unsafe.sort.PrefixComparator.NullOrder import org.apache.spark.util.random.XORShiftRandom class RadixSortSuite extends SparkFunSuite with Logging { @@ -40,24 +41,37 @@ class RadixSortSuite extends SparkFunSuite with Logging { case class RadixSortType( name: String, referenceComparator: PrefixComparator, - startByteIdx: Int, endByteIdx: Int, descending: Boolean, signed: Boolean) + startByteIdx: Int, endByteIdx: Int, descending: Boolean, signed: Boolean, nullFirst: Boolean) val SORT_TYPES_TO_TEST = Seq( - RadixSortType("unsigned binary data asc", PrefixComparators.BINARY, 0, 7, false, false), - RadixSortType("unsigned binary data desc", PrefixComparators.BINARY_DESC, 0, 7, true, false), - RadixSortType("twos complement asc", PrefixComparators.LONG, 0, 7, false, true), - RadixSortType("twos complement desc", PrefixComparators.LONG_DESC, 0, 7, true, true), + RadixSortType("unsigned binary data asc nulls first", + PrefixComparators.BINARY, 0, 7, false, false, true), + RadixSortType("unsigned binary data asc nulls last", + PrefixComparators.BINARY, 0, 7, false, false, false), + RadixSortType("unsigned binary data desc nulls last", + PrefixComparators.BINARY_DESC, 0, 7, true, false, false), + RadixSortType("unsigned binary data desc nulls first", + PrefixComparators.BINARY_DESC, 0, 7, true, false, true), + + RadixSortType("twos complement asc nulls first", + PrefixComparators.LONG, 0, 7, false, true, true), + RadixSortType("twos complement asc nulls last", + PrefixComparators.LONG, 0, 7, false, true, false), + RadixSortType("twos complement desc nulls last", + PrefixComparators.LONG_DESC, 0, 7, true, true, false), + RadixSortType("twos complement desc nulls first", + PrefixComparators.LONG_DESC, 0, 7, true, true, true), RadixSortType( "binary data partial", new PrefixComparators.RadixSortSupport { override def sortDescending = false override def sortSigned = false - override def nullOrder = null + override def nullOrder = NullOrder.FIRST override def compare(a: Long, b: Long): Int = { return PrefixComparators.BINARY.compare(a & 0xffffff0000L, b & 0xffffff0000L) } }, - 2, 4, false, false)) + 2, 4, false, false, true)) private def generateTestData(size: Int, rand: => Long): (Array[JLong], LongArray) = { val ref = Array.tabulate[Long](size) { i => rand } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index e774c71319d7b..18f814d6cdfd4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -714,9 +714,9 @@ class Analyzer( case s @ Sort(orders, global, child) if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) => val newOrders = orders map { - case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrder) => + case s @ SortOrder(UnresolvedOrdinal(index), direction) => if (index > 0 && index <= child.output.size) { - SortOrder(child.output(index - 1), direction, nullOrder) + SortOrder(child.output(index - 1), direction) } else { s.failAnalysis( s"ORDER BY position $index is not in select list " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala index af0a565f73ae9..6d8dc8628229a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala @@ -36,7 +36,7 @@ class SubstituteUnresolvedOrdinals(conf: CatalystConf) extends Rule[LogicalPlan] def apply(plan: LogicalPlan): LogicalPlan = plan transform { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { - case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _) => + case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 68a9efe9bd093..75ac64e153de1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -28,31 +28,29 @@ abstract sealed class SortDirection { def sql: String } -abstract sealed class NullOrdering { -def sql: String -} - +// default null order is firt for asc case object Ascending extends SortDirection { override def sql: String = "ASC" } -case object Descending extends SortDirection { - override def sql: String = "DESC" +case object AscendingNullLast extends SortDirection { + override def sql: String = "ASC NULLS LAST" } -case object NullFirst extends NullOrdering{ - override def sql: String = "NULLS FIRST" +// default null order is last for desc +case object Descending extends SortDirection { + override def sql: String = "DESC" } -case object NullLast extends NullOrdering{ - override def sql: String = "NULLS LAST" +case object DescendingNullFirst extends SortDirection { + override def sql: String = "DESC NULLS FIRST" } /** * An expression that can be used to sort a tuple. This class extends expression primarily so that * transformations over expression will descend into its child. */ -case class SortOrder(child: Expression, direction: SortDirection, nullOrder: NullOrdering = null) +case class SortOrder(child: Expression, direction: SortDirection) extends UnaryExpression with Unevaluable { /** Sort order is not foldable because we don't have an eval for it. */ @@ -70,10 +68,9 @@ case class SortOrder(child: Expression, direction: SortDirection, nullOrder: Nul override def nullable: Boolean = child.nullable override def toString: String = s"$child ${direction.sql}" - override def sql: String = - child.sql + " " + direction.sql + s"${if (nullOrder!=null) " " + nullOrder.sql else ""}" + override def sql: String = child.sql + " " + direction.sql - def isAscending: Boolean = direction == Ascending + def isAscending: Boolean = direction == Ascending || direction == AscendingNullLast } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index a0bb1edbe73c2..998efcc0b101c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -74,7 +74,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { val comparisons = ordering.map { order => val eval = order.child.genCode(ctx) - val asc = order.direction == Ascending + val asc = order.isAscending val isNullA = ctx.freshName("isNullA") val primitiveA = ctx.freshName("primitiveA") val isNullB = ctx.freshName("isNullB") @@ -99,17 +99,17 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR if ($isNullA && $isNullB) { // Nothing } else if ($isNullA) { - return ${ if (order.nullOrder == null) { - if (order.direction == Ascending) "-1" else "1" - } else { - if (order.nullOrder == NullFirst) "-1" else "1" - }}; + return ${ + order.direction match { + case Ascending | DescendingNullFirst => "-1" + case Descending | AscendingNullLast => "1" + }}; } else if ($isNullB) { - return ${ if (order.nullOrder == null) { - if (order.direction == Ascending) "1" else "-1" - } else { - if (order.nullOrder == NullFirst) "1" else "-1" - }}; + return ${ + order.direction match { + case Ascending | DescendingNullFirst => "1" + case Descending | AscendingNullLast => "-1" + }}; } else { int comp = ${ctx.genComp(order.child.dataType, primitiveA, primitiveB)}; if (comp != 0) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index b47486f7af7f9..c1a2b513d11c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -357,8 +357,8 @@ abstract class OffsetWindowFunction s"Offset expression must be a foldable integer expression: $x") } val boundary = direction match { - case Ascending => ValueFollowing(offsetValue) - case Descending => ValuePreceding(offsetValue) + case Ascending | AscendingNullLast => ValueFollowing(offsetValue) + case Descending | DescendingNullFirst => ValuePreceding(offsetValue) } SpecifiedWindowFrame(RowFrame, boundary, boundary) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 17f7a123cd85e..08e4ae0b61eb3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1206,20 +1206,26 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Create a [[SortOrder]] expression. */ override def visitSortItem(ctx: SortItemContext): SortOrder = withOrigin(ctx) { - val nullOrder = if (ctx.nullOrder != null) { - ctx.nullOrder.getType match { - case SqlBaseParser.FIRST => NullFirst - case SqlBaseParser.LAST => NullLast - case _ => null - } - } else { - null - } - if (ctx.DESC != null) { - SortOrder(expression(ctx.expression), Descending, nullOrder) + if (ctx.nullOrder != null) { + ctx.nullOrder.getType match { + case SqlBaseParser.FIRST => SortOrder(expression(ctx.expression), DescendingNullFirst) + case SqlBaseParser.LAST => SortOrder(expression(ctx.expression), Descending) + case _ => throw new ParseException(s"NULL ordering can be only FIRST or LAST", ctx) + } + } else { + SortOrder(expression(ctx.expression), Descending) + } } else { - SortOrder(expression(ctx.expression), Ascending, nullOrder) + if (ctx.nullOrder != null) { + ctx.nullOrder.getType match { + case SqlBaseParser.FIRST => SortOrder(expression(ctx.expression), Ascending) + case SqlBaseParser.LAST => SortOrder(expression(ctx.expression), AscendingNullLast) + case _ => throw new ParseException(s"NULL ordering can be only FIRST or LAST", ctx) + } + } else { + SortOrder(expression(ctx.expression), Ascending) + } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index f21f4b0747aa2..df5f4257212f7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -54,56 +54,39 @@ object SortPrefixUtils { private def getPrefixComparatorWithNullOrder( sortOrder: SortOrder, signedType: String): PrefixComparator = { - if (sortOrder.isAscending) { - if (sortOrder.nullOrder != null) { - if (sortOrder.nullOrder == NullFirst) { - signedType match { - case "LONG" => PrefixComparators.LONG_NULLFIRST - case "STRING" => PrefixComparators.STRING_NULLFIRST - case "BINARY" => PrefixComparators.BINARY_NULLFIRST - case "DOUBLE" => PrefixComparators.DOUBLE_NULLFIRST - } - } else { - signedType match { - case "LONG" => PrefixComparators.LONG_NULLLAST - case "STRING" => PrefixComparators.STRING_NULLLAST - case "BINARY" => PrefixComparators.BINARY_NULLLAST - case "DOUBLE" => PrefixComparators.DOUBLE_NULLLAST - } + sortOrder.direction match { + case AscendingNullLast => + signedType match { + case "LONG" => PrefixComparators.LONG_NULLLAST + case "STRING" => PrefixComparators.STRING_NULLLAST + case "BINARY" => PrefixComparators.BINARY_NULLLAST + case "DOUBLE" => PrefixComparators.DOUBLE_NULLLAST } - } else { + case Ascending => + // or the default NULLS FIRST signedType match { case "LONG" => PrefixComparators.LONG case "STRING" => PrefixComparators.STRING case "BINARY" => PrefixComparators.BINARY case "DOUBLE" => PrefixComparators.DOUBLE } - } - } else { - if (sortOrder.nullOrder != null) { - if (sortOrder.nullOrder == NullFirst) { - signedType match { - case "LONG" => PrefixComparators.LONG_DESC_NULLFIRST - case "STRING" => PrefixComparators.STRING_DESC_NULLFIRST - case "BINARY" => PrefixComparators.BINARY_DESC_NULLFIRST - case "DOUBLE" => PrefixComparators.DOUBLE_DESC_NULLFIRST - } - } else { - signedType match { - case "LONG" => PrefixComparators.LONG_DESC_NULLLAST - case "STRING" => PrefixComparators.STRING_DESC_NULLLAST - case "BINARY" => PrefixComparators.BINARY_DESC_NULLLAST - case "DOUBLE" => PrefixComparators.DOUBLE_DESC_NULLLAST - } + case DescendingNullFirst => + signedType match { + case "LONG" => PrefixComparators.LONG_DESC_NULLFIRST + case "STRING" => PrefixComparators.STRING_DESC_NULLFIRST + case "BINARY" => PrefixComparators.BINARY_DESC_NULLFIRST + case "DOUBLE" => PrefixComparators.DOUBLE_DESC_NULLFIRST } - } else { + case Descending => + // or the default NULLS LAST signedType match { case "LONG" => PrefixComparators.LONG_DESC case "STRING" => PrefixComparators.STRING_DESC case "BINARY" => PrefixComparators.BINARY_DESC case "DOUBLE" => PrefixComparators.DOUBLE_DESC } - } + case _ => throw new IllegalArgumentException( + "This should not happen. Contact Spark contributors for this error.") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala index 7012a68056972..919bfd55bc39f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala @@ -130,8 +130,8 @@ case class WindowExec( val current = newMutableProjection(expr :: Nil, child.output) // Flip the sign of the offset when processing the order is descending val boundOffset = sortExpr.direction match { - case Descending => -offset - case Ascending => offset + case Descending | DescendingNullFirst => -offset + case Ascending | AscendingNullLast => offset } // Create the projection which returns the current 'value' modified by adding the offset. val boundExpr = Add(expr, Cast(Literal.create(boundOffset, IntegerType), expr.dataType)) @@ -145,7 +145,7 @@ case class WindowExec( // to the result of bound value projection. This is done manually because we want to use // Code Generation (if it is enabled). val sortExprs = exprs.zipWithIndex.map { case (e, i) => - SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction, e.nullOrder) + SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction) } val ordering = newOrdering(sortExprs, Nil) RangeBoundOrdering(ordering, current, bound) diff --git a/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql b/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql new file mode 100644 index 0000000000000..f7637b444b9fe --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql @@ -0,0 +1,83 @@ +-- Q1. testing window functions with order by +create table spark_10747(col1 int, col2 int, col3 int) using parquet; + +-- Q2. insert to tables +INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), +(6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null); + +-- Q3. windowing with order by DESC NULLS LAST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q4. windowing with order by DESC NULLS FIRST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q5. windowing with order by ASC NULLS LAST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q6. windowing with order by ASC NULLS FIRST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q7. Regular query with ORDER BY ASC NULLS FIRST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2; + +-- Q8. Regular query with ORDER BY ASC NULLS LAST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2; + +-- Q9. Regular query with ORDER BY DESC NULLS FIRST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2; + +-- Q10. Regular query with ORDER BY DESC NULLS LAST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2; + +-- drop the test table +drop table spark_10747; + +-- Q11. mix datatype for ORDER BY NULLS FIRST|LAST +create table spark_10747_mix( +col1 string, +col2 int, +col3 double, +col4 decimal(10,2), +col5 decimal(20,1)) +using parquet; + +-- Q12. Insert to the table +INSERT INTO spark_10747_mix VALUES +('b', 2, 1.0, 1.00, 10.0), +('d', 3, 2.0, 3.00, 0.0), +('c', 3, 2.0, 2.00, 15.1), +('d', 3, 0.0, 3.00, 1.0), +(null, 3, 0.0, 3.00, 1.0), +('d', 3, null, 4.00, 1.0), +('a', 1, 1.0, 1.00, null), +('c', 3, 2.0, 2.00, null); + +-- Q13. Regular query with 2 NULLS LAST columns +select * from spark_10747_mix order by col1 nulls last, col5 nulls last; + +-- Q14. Regular query with 2 NULLS FIRST columns +select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first; + +-- Q15. Regular query with mixed NULLS FIRST|LAST +select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last; + +-- drop the test table +drop table spark_10747_mix; + + diff --git a/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out b/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out new file mode 100644 index 0000000000000..c1b63dfb8caef --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out @@ -0,0 +1,254 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 17 + + +-- !query 0 +create table spark_10747(col1 int, col2 int, col3 int) using parquet +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), +(6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 2 schema +struct +-- !query 2 output +6 9 10 28 +6 13 NULL 34 +6 10 NULL 41 +6 12 10 43 +6 15 8 55 +6 15 8 56 +6 11 4 56 +6 7 8 58 +6 7 4 58 + + +-- !query 3 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 3 schema +struct +-- !query 3 output +6 10 NULL 32 +6 11 4 33 +6 13 NULL 44 +6 7 4 48 +6 9 10 51 +6 15 8 55 +6 12 10 56 +6 15 8 56 +6 7 8 58 + + +-- !query 4 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 4 schema +struct +-- !query 4 output +6 7 4 25 +6 13 NULL 35 +6 11 4 40 +6 10 NULL 44 +6 7 8 55 +6 15 8 57 +6 15 8 58 +6 12 10 59 +6 9 10 61 + + +-- !query 5 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 5 schema +struct +-- !query 5 output +6 10 NULL 30 +6 12 10 36 +6 13 NULL 41 +6 7 4 48 +6 9 10 51 +6 11 4 53 +6 7 8 55 +6 15 8 57 +6 15 8 58 + + +-- !query 6 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 +-- !query 6 schema +struct +-- !query 6 output +6 10 NULL +6 13 NULL +6 7 4 +6 11 4 +6 7 8 +6 15 8 +6 15 8 +6 9 10 +6 12 10 + + +-- !query 7 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2 +-- !query 7 schema +struct +-- !query 7 output +6 7 4 +6 11 4 +6 7 8 +6 15 8 +6 15 8 +6 9 10 +6 12 10 +6 10 NULL +6 13 NULL + + +-- !query 8 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 +-- !query 8 schema +struct +-- !query 8 output +6 10 NULL +6 13 NULL +6 9 10 +6 12 10 +6 7 8 +6 15 8 +6 15 8 +6 7 4 +6 11 4 + + +-- !query 9 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 +-- !query 9 schema +struct +-- !query 9 output +6 9 10 +6 12 10 +6 7 8 +6 15 8 +6 15 8 +6 7 4 +6 11 4 +6 10 NULL +6 13 NULL + + +-- !query 10 +drop table spark_10747 +-- !query 10 schema +struct<> +-- !query 10 output + + + +-- !query 11 +create table spark_10747_mix( +col1 string, +col2 int, +col3 double, +col4 decimal(10,2), +col5 decimal(20,1)) +using parquet +-- !query 11 schema +struct<> +-- !query 11 output + + + +-- !query 12 +INSERT INTO spark_10747_mix VALUES +('b', 2, 1.0, 1.00, 10.0), +('d', 3, 2.0, 3.00, 0.0), +('c', 3, 2.0, 2.00, 15.1), +('d', 3, 0.0, 3.00, 1.0), +(null, 3, 0.0, 3.00, 1.0), +('d', 3, null, 4.00, 1.0), +('a', 1, 1.0, 1.00, null), +('c', 3, 2.0, 2.00, null) +-- !query 12 schema +struct<> +-- !query 12 output + + + +-- !query 13 +select * from spark_10747_mix order by col1 nulls last, col5 nulls last +-- !query 13 schema +struct +-- !query 13 output +a 1 1.0 1 NULL +b 2 1.0 1 10 +c 3 2.0 2 15.1 +c 3 2.0 2 NULL +d 3 2.0 3 0 +d 3 0.0 3 1 +d 3 NULL 4 1 +NULL 3 0.0 3 1 + + +-- !query 14 +select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first +-- !query 14 schema +struct +-- !query 14 output +NULL 3 0.0 3 1 +d 3 0.0 3 1 +d 3 NULL 4 1 +d 3 2.0 3 0 +c 3 2.0 2 NULL +c 3 2.0 2 15.1 +b 2 1.0 1 10 +a 1 1.0 1 NULL + + +-- !query 15 +select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last +-- !query 15 schema +struct +-- !query 15 output +c 3 2.0 2 NULL +a 1 1.0 1 NULL +c 3 2.0 2 15.1 +b 2 1.0 1 10 +d 3 0.0 3 1 +NULL 3 0.0 3 1 +d 3 NULL 4 1 +d 3 2.0 3 0 + + +-- !query 16 +drop table spark_10747_mix +-- !query 16 schema +struct<> +-- !query 16 output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f3ccfb56488c1..eac266cba55b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2661,186 +2661,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { data.selectExpr("`part.col1`", "`col.1`")) } } - - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST in WINDOW specification") { - - withTable("spark_10747") { - sql("create table spark_10747(col1 int, col2 int, col3 int) using parquet") - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), - |(6, 15, 8), (6, 7, 4), (6, 7, 8) - """.stripMargin) - - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) - """.stripMargin - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 desc nulls last, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 desc, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ) - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 desc nulls first, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - Row(6, 10, null, 32):: - Row(6, 13, null, 44):: - Row(6, 9, 10, 51):: - Row(6, 12, 10, 56):: - Row(6, 7, 8, 58):: - Row(6, 15, 8, 56):: - Row(6, 15, 8, 55):: - Row(6, 7, 4, 48):: - Row(6, 11, 4, 33)::Nil - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 asc nulls last, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - Row(6, 7, 4, 25):: - Row(6, 11, 4, 40):: - Row(6, 7, 8, 55):: - Row(6, 15, 8, 57):: - Row(6, 15, 8, 58):: - Row(6, 9, 10, 61):: - Row(6, 12, 10, 59):: - Row(6, 10, null, 44):: - Row(6, 13, null, 35)::Nil - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 asc nulls first, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 asc, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ) - ) - } - } - - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { - withTable("spark_10747") { - sql("create table spark_10747(col1 int, col2 int, col3 int) using parquet") - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), - |(6, 15, 8), (6, 7, 4), (6, 7, 8) - """.stripMargin) - - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 10, null), (6, 13, null) - """. - stripMargin - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 - """.stripMargin - ) - ) - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2 - """.stripMargin - ), - - Row(6, 7, 4) :: - Row(6, 11, 4) :: - Row(6, 7, 8) :: - Row(6, 15, 8) :: - Row(6, 15, 8) :: - Row(6, 9, 10) :: - Row(6, 12, 10) :: - Row(6, 10, null) :: - Row(6, 13, null) :: Nil - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 - """.stripMargin - ), - - Row(6, 10, null) :: - Row(6, 13, null) :: - Row(6, 9, 10) :: - Row(6, 12, 10) :: - Row(6, 7, 8) :: - Row(6, 15, 8) :: - Row(6, 15, 8) :: - Row(6, 7, 4) :: - Row(6, 11, 4) :: Nil - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 - """. - stripMargin - ) - ) - } - } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index e698e04556237..dc4d099f0f666 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1808,264 +1808,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST in WINDOW specification") { - - withTable("spark_10747") { - sql("create table spark_10747(col1 int, col2 int, col3 int) stored as parquet") - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), - |(6, 15, 8), (6, 7, 4), (6, 7, 8) - """.stripMargin) - - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 13, null), (6, 10, null) - """.stripMargin - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 desc nulls last, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 desc, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ) - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 desc nulls first, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - Row(6, 10, null, 32):: - Row(6, 13, null, 44):: - Row(6, 9, 10, 51):: - Row(6, 12, 10, 56):: - Row(6, 7, 8, 58):: - Row(6, 15, 8, 56):: - Row(6, 15, 8, 55):: - Row(6, 7, 4, 48):: - Row(6, 11, 4, 33)::Nil - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 asc nulls last, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - Row(6, 7, 4, 25):: - Row(6, 11, 4, 40):: - Row(6, 7, 8, 55):: - Row(6, 15, 8, 57):: - Row(6, 15, 8, 58):: - Row(6, 9, 10, 61):: - Row(6, 12, 10, 59):: - Row(6, 10, null, 44):: - Row(6, 13, null, 35)::Nil - ) - - checkAnswer( - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 asc nulls first, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ), - sql( - """ - |select col1, col2, col3, sum(col2) - | over (partition by col1 - | order by col3 asc, col2 - | rows between 2 preceding and 2 following ) - |from spark_10747 where col1 = 6 - """.stripMargin - ) - ) - } - } - - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST") { - withTable("spark_10747") { - sql("create table spark_10747(col1 int, col2 int, col3 int) stored as parquet") - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), - |(6, 15, 8), (6, 7, 4), (6, 7, 8) - """.stripMargin) - - sql( - """ - |INSERT INTO spark_10747 VALUES (6, 10, null), (6, 13, null) - """. - stripMargin - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC, COL2 - """.stripMargin - ) - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2 - """.stripMargin - ), - - Row(6, 7, 4) :: - Row(6, 11, 4) :: - Row(6, 7, 8) :: - Row(6, 15, 8) :: - Row(6, 15, 8) :: - Row(6, 9, 10) :: - Row(6, 12, 10) :: - Row(6, 10, null) :: - Row(6, 13, null) :: Nil - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 - """.stripMargin - ), - - Row(6, 10, null) :: - Row(6, 13, null) :: - Row(6, 9, 10) :: - Row(6, 12, 10) :: - Row(6, 7, 8) :: - Row(6, 15, 8) :: - Row(6, 15, 8) :: - Row(6, 7, 4) :: - Row(6, 11, 4) :: Nil - ) - - checkAnswer( - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 - """.stripMargin - ), - sql( - """ - |SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC, COL2 - """. - stripMargin - ) - ) - } - } - - test("SPARK-10747: ORDER BY clause with NULLS FIRST|LAST with mixed datatypes") { - withTable("spark_10747") { - sql( - """ - |create table spark_10747( - |col1 string, - |col2 int, - |col3 double, - |col4 decimal(10,2), - |col5 decimal(20,1)) - |stored as parquet - """.stripMargin) - sql( - """ - |INSERT INTO spark_10747 VALUES - |('b', 2, 1.0, 1.00, 10.0), - |('d', 3, 2.0, 3.00, 0.0), - |('c', 3, 2.0, 2.00, 15.1), - |('d', 3, 0.0, 3.00, 1.0), - |(null, 3, 0.0, 3.00, 1.0), - |('d', 3, null, 4.00, 1.0), - |('a', 1, 1.0, 1.00, null), - |('c', 3, 2.0, 2.00, null) - """.stripMargin) - - checkAnswer( - sql( - """ - |select * from spark_10747 order by col1 nulls last, col5 nulls last - """.stripMargin - ), - Row("a", 1, 1.0, 1.00, null) :: - Row("b", 2, 1.0, 1.00, 10.0) :: - Row("c", 3, 2.0, 2.00, 15.1) :: - Row("c", 3, 2.0, 2.00, null) :: - Row("d", 3, 2.0, 3.00, 0.0) :: - Row("d", 3, 0.0, 3.00, 1.0) :: - Row("d", 3, null, 4.00, 1.0) :: - Row(null, 3, 0.0, 3.00, 1.0) :: Nil - ) - - checkAnswer( - sql( - """ - |select * from spark_10747 order by col1 desc nulls first, col5 desc nulls first - """.stripMargin - ), - Row(null, 3, 0.0, 3.00, 1.0):: - Row("d", 3, 0.0, 3.00, 1.0):: - Row("d", 3, null, 4.00, 1.0):: - Row("d", 3, 2.0, 3.00, 0.0):: - Row("c", 3, 2.0, 2.00, null):: - Row("c", 3, 2.0, 2.00, 15.1):: - Row("b", 2, 1.0, 1.00, 10.0):: - Row("a", 1, 1.0, 1.00, null)::Nil - ) - - checkAnswer( - sql( - """ - |select * from spark_10747 order by col5 desc nulls first, col3 desc nulls last - """.stripMargin - ), - Row("c", 3, 2.0, 2.00, null):: - Row("a", 1, 1.0, 1.00, null):: - Row("c", 3, 2.0, 2.00, 15.1):: - Row("b", 2, 1.0, 1.00, 10.0):: - Row("d", 3, 0.0, 3.00, 1.0):: - Row(null, 3, 0.0, 3.00, 1.0):: - Row("d", 3, null, 4.00, 1.0):: - Row("d", 3, 2.0, 3.00, 0.0)::Nil - ) - } - } - def testCommandAvailable(command: String): Boolean = { val attempt = Try(Process(command).run(ProcessLogger(_ => ())).exitValue()) attempt.isSuccess && attempt.get == 0 From 155dcbb9d5cff396c9cd7c6bc46b83aace71cc21 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Wed, 31 Aug 2016 17:14:57 -0700 Subject: [PATCH 09/19] SPARK-10747: SortOrder 3rd parameter nullOrdering --- .../sql/catalyst/analysis/Analyzer.scala | 4 +-- .../SubstituteUnresolvedOrdinals.scala | 2 +- .../sql/catalyst/expressions/SortOrder.scala | 33 ++++++++++++++----- .../codegen/GenerateOrdering.scala | 14 ++++---- .../sql/catalyst/expressions/ordering.scala | 6 ++-- .../expressions/windowExpressions.scala | 4 +-- .../sql/catalyst/parser/AstBuilder.scala | 12 ++++--- .../spark/sql/execution/SortPrefixUtils.scala | 10 +++--- .../spark/sql/execution/SparkPlan.scala | 2 +- .../spark/sql/execution/WindowExec.scala | 4 +-- 10 files changed, 56 insertions(+), 35 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 18f814d6cdfd4..92bf8e0536fc4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -714,9 +714,9 @@ class Analyzer( case s @ Sort(orders, global, child) if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) => val newOrders = orders map { - case s @ SortOrder(UnresolvedOrdinal(index), direction) => + case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrdering) => if (index > 0 && index <= child.output.size) { - SortOrder(child.output(index - 1), direction) + SortOrder(child.output(index - 1), direction, nullOrdering) } else { s.failAnalysis( s"ORDER BY position $index is not in select list " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala index 6d8dc8628229a..af0a565f73ae9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala @@ -36,7 +36,7 @@ class SubstituteUnresolvedOrdinals(conf: CatalystConf) extends Rule[LogicalPlan] def apply(plan: LogicalPlan): LogicalPlan = plan transform { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { - case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _) => + case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 75ac64e153de1..4bbbb3ff45525 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -26,31 +26,40 @@ import org.apache.spark.util.collection.unsafe.sort.PrefixComparators.DoublePref abstract sealed class SortDirection { def sql: String + def defaultNullOrdering: NullOrdering } -// default null order is firt for asc -case object Ascending extends SortDirection { - override def sql: String = "ASC" +abstract sealed class NullOrdering { + def sql: String } -case object AscendingNullLast extends SortDirection { - override def sql: String = "ASC NULLS LAST" +case object Ascending extends SortDirection { + override def sql: String = "ASC" + override def defaultNullOrdering: NullOrdering = NullFirst } // default null order is last for desc case object Descending extends SortDirection { override def sql: String = "DESC" + override def defaultNullOrdering: NullOrdering = NullLast } -case object DescendingNullFirst extends SortDirection { - override def sql: String = "DESC NULLS FIRST" +case object NullFirst extends NullOrdering{ + override def sql: String = "NULLS FIRST" +} + +case object NullLast extends NullOrdering{ + override def sql: String = "NULLS LAST" } /** * An expression that can be used to sort a tuple. This class extends expression primarily so that * transformations over expression will descend into its child. */ -case class SortOrder(child: Expression, direction: SortDirection) +case class SortOrder( + child: Expression, + direction: SortDirection, + nullOrdering: NullOrdering) extends UnaryExpression with Unevaluable { /** Sort order is not foldable because we don't have an eval for it. */ @@ -70,7 +79,13 @@ case class SortOrder(child: Expression, direction: SortDirection) override def toString: String = s"$child ${direction.sql}" override def sql: String = child.sql + " " + direction.sql - def isAscending: Boolean = direction == Ascending || direction == AscendingNullLast + def isAscending: Boolean = direction == Ascending +} + +object SortOrder { + def apply(child: Expression, direction: SortDirection): SortOrder = { + new SortOrder(child, direction, direction.defaultNullOrdering) + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 998efcc0b101c..758d3544ae367 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -63,7 +63,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR */ def genComparisons(ctx: CodegenContext, schema: StructType): String = { val ordering = schema.fields.map(_.dataType).zipWithIndex.map { - case(dt, index) => new SortOrder(BoundReference(index, dt, nullable = true), Ascending) + case(dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending) } genComparisons(ctx, ordering) } @@ -100,15 +100,15 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR // Nothing } else if ($isNullA) { return ${ - order.direction match { - case Ascending | DescendingNullFirst => "-1" - case Descending | AscendingNullLast => "1" + order.nullOrdering match { + case NullFirst => "-1" + case NullLast => "1" }}; } else if ($isNullB) { return ${ - order.direction match { - case Ascending | DescendingNullFirst => "1" - case Descending | AscendingNullLast => "-1" + order.nullOrdering match { + case NullFirst => "1" + case NullLast => "-1" }}; } else { int comp = ${ctx.genComp(order.child.dataType, primitiveA, primitiveB)}; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala index 6112259fed619..c47427d3c0ce6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala @@ -39,9 +39,9 @@ class InterpretedOrdering(ordering: Seq[SortOrder]) extends Ordering[InternalRow if (left == null && right == null) { // Both null, continue looking. } else if (left == null) { - return if (order.direction == Ascending) -1 else 1 + return if (order.nullOrdering == NullFirst) -1 else 1 } else if (right == null) { - return if (order.direction == Ascending) 1 else -1 + return if (order.nullOrdering == NullFirst) 1 else -1 } else { val comparison = order.dataType match { case dt: AtomicType if order.direction == Ascending => @@ -76,7 +76,7 @@ object InterpretedOrdering { */ def forSchema(dataTypes: Seq[DataType]): InterpretedOrdering = { new InterpretedOrdering(dataTypes.zipWithIndex.map { - case (dt, index) => new SortOrder(BoundReference(index, dt, nullable = true), Ascending) + case (dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending) }) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index c1a2b513d11c7..b47486f7af7f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -357,8 +357,8 @@ abstract class OffsetWindowFunction s"Offset expression must be a foldable integer expression: $x") } val boundary = direction match { - case Ascending | AscendingNullLast => ValueFollowing(offsetValue) - case Descending | DescendingNullFirst => ValuePreceding(offsetValue) + case Ascending => ValueFollowing(offsetValue) + case Descending => ValuePreceding(offsetValue) } SpecifiedWindowFrame(RowFrame, boundary, boundary) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 08e4ae0b61eb3..06ee04e547fff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1209,8 +1209,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { if (ctx.DESC != null) { if (ctx.nullOrder != null) { ctx.nullOrder.getType match { - case SqlBaseParser.FIRST => SortOrder(expression(ctx.expression), DescendingNullFirst) - case SqlBaseParser.LAST => SortOrder(expression(ctx.expression), Descending) + case SqlBaseParser.FIRST => + SortOrder(expression(ctx.expression), Descending, NullFirst) + case SqlBaseParser.LAST => + SortOrder(expression(ctx.expression), Descending) case _ => throw new ParseException(s"NULL ordering can be only FIRST or LAST", ctx) } } else { @@ -1219,8 +1221,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } else { if (ctx.nullOrder != null) { ctx.nullOrder.getType match { - case SqlBaseParser.FIRST => SortOrder(expression(ctx.expression), Ascending) - case SqlBaseParser.LAST => SortOrder(expression(ctx.expression), AscendingNullLast) + case SqlBaseParser.FIRST => + SortOrder(expression(ctx.expression), Ascending) + case SqlBaseParser.LAST => + SortOrder(expression(ctx.expression), Ascending, NullLast) case _ => throw new ParseException(s"NULL ordering can be only FIRST or LAST", ctx) } } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index df5f4257212f7..af099ff002478 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -55,7 +55,7 @@ object SortPrefixUtils { private def getPrefixComparatorWithNullOrder( sortOrder: SortOrder, signedType: String): PrefixComparator = { sortOrder.direction match { - case AscendingNullLast => + case Ascending if (sortOrder.nullOrdering == NullLast) => signedType match { case "LONG" => PrefixComparators.LONG_NULLLAST case "STRING" => PrefixComparators.STRING_NULLLAST @@ -70,7 +70,7 @@ object SortPrefixUtils { case "BINARY" => PrefixComparators.BINARY case "DOUBLE" => PrefixComparators.DOUBLE } - case DescendingNullFirst => + case Descending if (sortOrder.nullOrdering == NullFirst) => signedType match { case "LONG" => PrefixComparators.LONG_DESC_NULLFIRST case "STRING" => PrefixComparators.STRING_DESC_NULLFIRST @@ -96,7 +96,8 @@ object SortPrefixUtils { def getPrefixComparator(schema: StructType): PrefixComparator = { if (schema.nonEmpty) { val field = schema.head - getPrefixComparator(SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) + getPrefixComparator( + SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) } else { new PrefixComparator { override def compare(prefix1: Long, prefix2: Long): Int = 0 @@ -123,7 +124,8 @@ object SortPrefixUtils { * Returns whether the fully sorting on the specified key field is possible with radix sort. */ def canSortFullyWithPrefix(field: StructField): Boolean = { - canSortFullyWithPrefix(SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) + canSortFullyWithPrefix( + SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index 6a2d97c9b1797..6aeefa6eddafe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -368,7 +368,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ */ protected def newNaturalAscendingOrdering(dataTypes: Seq[DataType]): Ordering[InternalRow] = { val order: Seq[SortOrder] = dataTypes.zipWithIndex.map { - case (dt, index) => new SortOrder(BoundReference(index, dt, nullable = true), Ascending) + case (dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending) } newOrdering(order, Seq.empty) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala index 919bfd55bc39f..9d006d21d9440 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WindowExec.scala @@ -130,8 +130,8 @@ case class WindowExec( val current = newMutableProjection(expr :: Nil, child.output) // Flip the sign of the offset when processing the order is descending val boundOffset = sortExpr.direction match { - case Descending | DescendingNullFirst => -offset - case Ascending | AscendingNullLast => offset + case Descending => -offset + case Ascending => offset } // Create the projection which returns the current 'value' modified by adding the offset. val boundExpr = Add(expr, Cast(Literal.create(boundOffset, IntegerType), expr.dataType)) From c2abf6a96ed13b1128936107fe15543bddf2d34b Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Thu, 1 Sep 2016 11:01:02 -0700 Subject: [PATCH 10/19] SPARK-10747: update upon review --- .../sql/catalyst/expressions/SortOrder.scala | 4 +-- .../sql/catalyst/parser/AstBuilder.scala | 34 ++++++------------- .../spark/sql/execution/SortPrefixUtils.scala | 6 ++-- sql/hive/src/test/resources/sqlgen/agg2.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg3.sql | 2 +- .../sqlgen/broadcast_join_subquery.sql | 2 +- .../sqlgen/generate_with_other_1.sql | 2 +- .../sqlgen/generate_with_other_2.sql | 2 +- .../resources/sqlgen/grouping_sets_2_1.sql | 2 +- .../resources/sqlgen/grouping_sets_2_2.sql | 2 +- .../resources/sqlgen/grouping_sets_2_3.sql | 2 +- .../resources/sqlgen/grouping_sets_2_4.sql | 2 +- .../resources/sqlgen/grouping_sets_2_5.sql | 2 +- .../test/resources/sqlgen/rollup_cube_6_1.sql | 2 +- .../test/resources/sqlgen/rollup_cube_6_2.sql | 2 +- .../test/resources/sqlgen/rollup_cube_6_3.sql | 2 +- .../test/resources/sqlgen/rollup_cube_6_4.sql | 2 +- .../resources/sqlgen/sort_asc_nulls_last.sql | 4 +++ .../resources/sqlgen/sort_by_after_having.sql | 2 +- .../sqlgen/sort_desc_nulls_first.sql | 4 +++ .../resources/sqlgen/subquery_in_having_1.sql | 2 +- .../resources/sqlgen/subquery_in_having_2.sql | 2 +- .../test/resources/sqlgen/window_basic_2.sql | 2 +- .../test/resources/sqlgen/window_basic_3.sql | 2 +- .../sqlgen/window_basic_asc_nulls_last.sql | 5 +++ .../sqlgen/window_basic_desc_nulls_first.sql | 5 +++ .../resources/sqlgen/window_with_join.sql | 2 +- .../window_with_the_same_window_with_agg.sql | 2 +- ...w_with_the_same_window_with_agg_filter.sql | 2 +- ...ith_the_same_window_with_agg_functions.sql | 2 +- ...w_with_the_same_window_with_agg_having.sql | 2 +- .../sql/catalyst/LogicalPlanToSQLSuite.scala | 24 +++++++++++++ 32 files changed, 81 insertions(+), 53 deletions(-) create mode 100644 sql/hive/src/test/resources/sqlgen/sort_asc_nulls_last.sql create mode 100644 sql/hive/src/test/resources/sqlgen/sort_desc_nulls_first.sql create mode 100644 sql/hive/src/test/resources/sqlgen/window_basic_asc_nulls_last.sql create mode 100644 sql/hive/src/test/resources/sqlgen/window_basic_desc_nulls_first.sql diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 4bbbb3ff45525..7977802f4741f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -76,8 +76,8 @@ case class SortOrder( override def dataType: DataType = child.dataType override def nullable: Boolean = child.nullable - override def toString: String = s"$child ${direction.sql}" - override def sql: String = child.sql + " " + direction.sql + override def toString: String = s"$child ${direction.sql} ${nullOrdering.sql}" + override def sql: String = child.sql + " " + direction.sql + " " + nullOrdering.sql def isAscending: Boolean = direction == Ascending } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 06ee04e547fff..4b3dac89466b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1206,31 +1206,19 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Create a [[SortOrder]] expression. */ override def visitSortItem(ctx: SortItemContext): SortOrder = withOrigin(ctx) { - if (ctx.DESC != null) { - if (ctx.nullOrder != null) { - ctx.nullOrder.getType match { - case SqlBaseParser.FIRST => - SortOrder(expression(ctx.expression), Descending, NullFirst) - case SqlBaseParser.LAST => - SortOrder(expression(ctx.expression), Descending) - case _ => throw new ParseException(s"NULL ordering can be only FIRST or LAST", ctx) - } - } else { - SortOrder(expression(ctx.expression), Descending) - } + val direction = if (ctx.DESC != null) { + Descending } else { - if (ctx.nullOrder != null) { - ctx.nullOrder.getType match { - case SqlBaseParser.FIRST => - SortOrder(expression(ctx.expression), Ascending) - case SqlBaseParser.LAST => - SortOrder(expression(ctx.expression), Ascending, NullLast) - case _ => throw new ParseException(s"NULL ordering can be only FIRST or LAST", ctx) - } - } else { - SortOrder(expression(ctx.expression), Ascending) - } + Ascending + } + val nullOrdering = if (ctx.FIRST != null) { + NullFirst + } else if (ctx.LAST != null) { + NullLast + } else { + direction.defaultNullOrdering } + SortOrder(expression(ctx.expression), direction, nullOrdering) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index af099ff002478..f44d3558a44d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -96,8 +96,7 @@ object SortPrefixUtils { def getPrefixComparator(schema: StructType): PrefixComparator = { if (schema.nonEmpty) { val field = schema.head - getPrefixComparator( - SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) + getPrefixComparator(SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) } else { new PrefixComparator { override def compare(prefix1: Long, prefix2: Long): Int = 0 @@ -124,8 +123,7 @@ object SortPrefixUtils { * Returns whether the fully sorting on the specified key field is possible with radix sort. */ def canSortFullyWithPrefix(field: StructField): Boolean = { - canSortFullyWithPrefix( - SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) + canSortFullyWithPrefix(SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending)) } /** diff --git a/sql/hive/src/test/resources/sqlgen/agg2.sql b/sql/hive/src/test/resources/sqlgen/agg2.sql index 65d71714fe850..adbfdb7e79d64 100644 --- a/sql/hive/src/test/resources/sqlgen/agg2.sql +++ b/sql/hive/src/test/resources/sqlgen/agg2.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY MAX(key) -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT count(`gen_attr_3`) AS `gen_attr_0`, max(`gen_attr_2`) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_2` ORDER BY `gen_attr_1` ASC) AS gen_subquery_1) AS gen_subquery_2 +SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT count(`gen_attr_3`) AS `gen_attr_0`, max(`gen_attr_2`) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_2` ORDER BY `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1) AS gen_subquery_2 diff --git a/sql/hive/src/test/resources/sqlgen/agg3.sql b/sql/hive/src/test/resources/sqlgen/agg3.sql index 14b19392cdce3..207542d226e23 100644 --- a/sql/hive/src/test/resources/sqlgen/agg3.sql +++ b/sql/hive/src/test/resources/sqlgen/agg3.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key, MAX(key) -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT count(`gen_attr_4`) AS `gen_attr_0`, `gen_attr_3` AS `gen_attr_1`, max(`gen_attr_3`) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_4` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_3` ORDER BY `gen_attr_1` ASC, `gen_attr_2` ASC) AS gen_subquery_1) AS gen_subquery_2 +SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT count(`gen_attr_4`) AS `gen_attr_0`, `gen_attr_3` AS `gen_attr_1`, max(`gen_attr_3`) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_4` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_3` ORDER BY `gen_attr_1` ASC NULLS FIRST, `gen_attr_2` ASC NULLS FIRST) AS gen_subquery_1) AS gen_subquery_2 diff --git a/sql/hive/src/test/resources/sqlgen/broadcast_join_subquery.sql b/sql/hive/src/test/resources/sqlgen/broadcast_join_subquery.sql index ec881a216e0b0..3de4f8a059965 100644 --- a/sql/hive/src/test/resources/sqlgen/broadcast_join_subquery.sql +++ b/sql/hive/src/test/resources/sqlgen/broadcast_join_subquery.sql @@ -5,4 +5,4 @@ FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) ORDER BY subq.key1, z.value -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key1`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_7` AS `gen_attr_6`, `gen_attr_9` AS `gen_attr_8`, `gen_attr_11` AS `gen_attr_10` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_7` FROM `default`.`src1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_9`, `value` AS `gen_attr_11` FROM `default`.`src`) AS gen_subquery_1 ON (`gen_attr_5` = `gen_attr_9`)) AS subq INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1`, `ds` AS `gen_attr_3`, `hr` AS `gen_attr_4` FROM `default`.`srcpart`) AS gen_subquery_2 ON (((`gen_attr_0` = `gen_attr_2`) AND (`gen_attr_3` = '2008-04-08')) AND (CAST(`gen_attr_4` AS DOUBLE) = CAST(11 AS DOUBLE))) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_3 +SELECT `gen_attr_0` AS `key1`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_7` AS `gen_attr_6`, `gen_attr_9` AS `gen_attr_8`, `gen_attr_11` AS `gen_attr_10` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_7` FROM `default`.`src1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_9`, `value` AS `gen_attr_11` FROM `default`.`src`) AS gen_subquery_1 ON (`gen_attr_5` = `gen_attr_9`)) AS subq INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1`, `ds` AS `gen_attr_3`, `hr` AS `gen_attr_4` FROM `default`.`srcpart`) AS gen_subquery_2 ON (((`gen_attr_0` = `gen_attr_2`) AND (`gen_attr_3` = '2008-04-08')) AND (CAST(`gen_attr_4` AS DOUBLE) = CAST(11 AS DOUBLE))) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_3 diff --git a/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql b/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql index 805197a4ea11b..ab444d0c70936 100644 --- a/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql +++ b/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql @@ -5,4 +5,4 @@ WHERE id > 2 ORDER BY val, id LIMIT 5 -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_0.`gen_attr_2`, gen_subquery_0.`gen_attr_3`, gen_subquery_0.`gen_attr_4`, gen_subquery_0.`gen_attr_1` FROM (SELECT `arr` AS `gen_attr_2`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_4`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 WHERE (`gen_attr_1` > CAST(2 AS BIGINT))) AS gen_subquery_1 LATERAL VIEW explode(`gen_attr_2`) gen_subquery_2 AS `gen_attr_0` ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC LIMIT 5) AS parquet_t3 +SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_0.`gen_attr_2`, gen_subquery_0.`gen_attr_3`, gen_subquery_0.`gen_attr_4`, gen_subquery_0.`gen_attr_1` FROM (SELECT `arr` AS `gen_attr_2`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_4`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 WHERE (`gen_attr_1` > CAST(2 AS BIGINT))) AS gen_subquery_1 LATERAL VIEW explode(`gen_attr_2`) gen_subquery_2 AS `gen_attr_0` ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST LIMIT 5) AS parquet_t3 diff --git a/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql b/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql index ef9a596197b8b..42a2369f34d1c 100644 --- a/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql +++ b/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql @@ -7,4 +7,4 @@ WHERE val > 2 ORDER BY val, id LIMIT 5 -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `arr` AS `gen_attr_4`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_5`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 LATERAL VIEW explode(`gen_attr_3`) gen_subquery_2 AS `gen_attr_2` LATERAL VIEW explode(`gen_attr_2`) gen_subquery_3 AS `gen_attr_0` WHERE (`gen_attr_0` > CAST(2 AS BIGINT)) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC LIMIT 5) AS gen_subquery_1 +SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `arr` AS `gen_attr_4`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_5`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 LATERAL VIEW explode(`gen_attr_3`) gen_subquery_2 AS `gen_attr_2` LATERAL VIEW explode(`gen_attr_2`) gen_subquery_3 AS `gen_attr_0` WHERE (`gen_attr_0` > CAST(2 AS BIGINT)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST LIMIT 5) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_1.sql b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_1.sql index b2c426c660d80..245b52341658f 100644 --- a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_1.sql +++ b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_1.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(c) FROM parquet_t2 GROUP BY a, b GROUPING SETS (a, b) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`), (`gen_attr_6`)) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`), (`gen_attr_6`)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_2.sql b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_2.sql index 96ee8e85951e8..1505dea11ec68 100644 --- a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_2.sql +++ b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_2.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(c) FROM parquet_t2 GROUP BY a, b GROUPING SETS (a) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`)) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_3.sql b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_3.sql index 9b8b230c879c2..281add6aabb64 100644 --- a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_3.sql +++ b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_3.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(c) FROM parquet_t2 GROUP BY a, b GROUPING SETS (b) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_6`)) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_6`)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_4.sql b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_4.sql index c35db74a5c5b5..f8d64742b11e3 100644 --- a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_4.sql +++ b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_4.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(c) FROM parquet_t2 GROUP BY a, b GROUPING SETS (()) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS(()) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS(()) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_5.sql b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_5.sql index e47f6d5dcf465..09e6ec2a5f8c9 100644 --- a/sql/hive/src/test/resources/sqlgen/grouping_sets_2_5.sql +++ b/sql/hive/src/test/resources/sqlgen/grouping_sets_2_5.sql @@ -2,4 +2,4 @@ SELECT a, b, sum(c) FROM parquet_t2 GROUP BY a, b GROUPING SETS ((), (a), (a, b)) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((), (`gen_attr_5`), (`gen_attr_5`, `gen_attr_6`)) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((), (`gen_attr_5`), (`gen_attr_5`, `gen_attr_6`)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_1.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_1.sql index 22df578518ef3..c364c32dd5c55 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_1.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_1.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(c) FROM parquet_t2 GROUP BY ROLLUP(a, b) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`, `gen_attr_6`), (`gen_attr_5`), ()) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`, `gen_attr_6`), (`gen_attr_5`), ()) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_2.sql index f44b652343acb..36c0223fceced 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_2.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(c) FROM parquet_t2 GROUP BY CUBE(a, b) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`, `gen_attr_6`), (`gen_attr_5`), (`gen_attr_6`), ()) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(c)` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_6` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_5`, `b` AS `gen_attr_6`, `c` AS `gen_attr_4`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_5`, `gen_attr_6` GROUPING SETS((`gen_attr_5`, `gen_attr_6`), (`gen_attr_5`), (`gen_attr_6`), ()) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_3.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_3.sql index 40f6924913765..ed33f2a1de3cf 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_3.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_3.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(a) FROM parquet_t2 GROUP BY ROLLUP(a, b) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(a)` FROM (SELECT `gen_attr_4` AS `gen_attr_0`, `gen_attr_5` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_4`, `b` AS `gen_attr_5`, `c` AS `gen_attr_6`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_4`, `gen_attr_5` GROUPING SETS((`gen_attr_4`, `gen_attr_5`), (`gen_attr_4`), ()) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(a)` FROM (SELECT `gen_attr_4` AS `gen_attr_0`, `gen_attr_5` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_4`, `b` AS `gen_attr_5`, `c` AS `gen_attr_6`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_4`, `gen_attr_5` GROUPING SETS((`gen_attr_4`, `gen_attr_5`), (`gen_attr_4`), ()) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_4.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_4.sql index 608e644dee6d0..e0e40241480da 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_6_4.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_6_4.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT a, b, sum(a) FROM parquet_t2 GROUP BY CUBE(a, b) ORDER BY a, b -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(a)` FROM (SELECT `gen_attr_4` AS `gen_attr_0`, `gen_attr_5` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_4`, `b` AS `gen_attr_5`, `c` AS `gen_attr_6`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_4`, `gen_attr_5` GROUPING SETS((`gen_attr_4`, `gen_attr_5`), (`gen_attr_4`), (`gen_attr_5`), ()) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_1 +SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b`, `gen_attr_3` AS `sum(a)` FROM (SELECT `gen_attr_4` AS `gen_attr_0`, `gen_attr_5` AS `gen_attr_1`, sum(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `a` AS `gen_attr_4`, `b` AS `gen_attr_5`, `c` AS `gen_attr_6`, `d` AS `gen_attr_7` FROM `default`.`parquet_t2`) AS gen_subquery_0 GROUP BY `gen_attr_4`, `gen_attr_5` GROUPING SETS((`gen_attr_4`, `gen_attr_5`), (`gen_attr_4`), (`gen_attr_5`), ()) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_1 diff --git a/sql/hive/src/test/resources/sqlgen/sort_asc_nulls_last.sql b/sql/hive/src/test/resources/sqlgen/sort_asc_nulls_last.sql new file mode 100644 index 0000000000000..da4e3678a33b9 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/sort_asc_nulls_last.sql @@ -0,0 +1,4 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key nulls last, MAX(key) +-------------------------------------------------------------------------------- +SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT count(`gen_attr_4`) AS `gen_attr_0`, `gen_attr_3` AS `gen_attr_1`, max(`gen_attr_3`) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_4` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_3` ORDER BY `gen_attr_1` ASC NULLS LAST, `gen_attr_2` ASC NULLS FIRST) AS gen_subquery_1) AS gen_subquery_2 diff --git a/sql/hive/src/test/resources/sqlgen/sort_by_after_having.sql b/sql/hive/src/test/resources/sqlgen/sort_by_after_having.sql index da60204297a21..a4f3ddc761f30 100644 --- a/sql/hive/src/test/resources/sqlgen/sort_by_after_having.sql +++ b/sql/hive/src/test/resources/sqlgen/sort_by_after_having.sql @@ -1,4 +1,4 @@ -- This file is automatically generated by LogicalPlanToSQLSuite. SELECT COUNT(value) FROM parquet_t1 GROUP BY key HAVING MAX(key) > 0 SORT BY key -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT count(`gen_attr_3`) AS `gen_attr_0`, max(`gen_attr_1`) AS `gen_attr_2`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_1`, `value` AS `gen_attr_3` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_1` HAVING (`gen_attr_2` > CAST(0 AS BIGINT))) AS gen_subquery_1 SORT BY `gen_attr_1` ASC) AS gen_subquery_2) AS gen_subquery_3 +SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT count(`gen_attr_3`) AS `gen_attr_0`, max(`gen_attr_1`) AS `gen_attr_2`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_1`, `value` AS `gen_attr_3` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_1` HAVING (`gen_attr_2` > CAST(0 AS BIGINT))) AS gen_subquery_1 SORT BY `gen_attr_1` ASC NULLS FIRST) AS gen_subquery_2) AS gen_subquery_3 diff --git a/sql/hive/src/test/resources/sqlgen/sort_desc_nulls_first.sql b/sql/hive/src/test/resources/sqlgen/sort_desc_nulls_first.sql new file mode 100644 index 0000000000000..d995e3bdfad5c --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/sort_desc_nulls_first.sql @@ -0,0 +1,4 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key desc nulls first,MAX(key) +-------------------------------------------------------------------------------- +SELECT `gen_attr_0` AS `count(value)` FROM (SELECT `gen_attr_0` FROM (SELECT count(`gen_attr_4`) AS `gen_attr_0`, `gen_attr_3` AS `gen_attr_1`, max(`gen_attr_3`) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_4` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_3` ORDER BY `gen_attr_1` DESC NULLS FIRST, `gen_attr_2` ASC NULLS FIRST) AS gen_subquery_1) AS gen_subquery_2 diff --git a/sql/hive/src/test/resources/sqlgen/subquery_in_having_1.sql b/sql/hive/src/test/resources/sqlgen/subquery_in_having_1.sql index 9894f5ab39c76..25882147463b9 100644 --- a/sql/hive/src/test/resources/sqlgen/subquery_in_having_1.sql +++ b/sql/hive/src/test/resources/sqlgen/subquery_in_having_1.sql @@ -5,4 +5,4 @@ group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key) order by key -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (`gen_attr_2` IN (SELECT `gen_attr_5` AS `_c0` FROM (SELECT `gen_attr_3` AS `gen_attr_5` FROM (SELECT count(1) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_7` FROM `default`.`src`) AS gen_subquery_3 WHERE (CAST(`gen_attr_6` AS DOUBLE) = CAST('90' AS DOUBLE)) GROUP BY `gen_attr_6`) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS src +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (`gen_attr_2` IN (SELECT `gen_attr_5` AS `_c0` FROM (SELECT `gen_attr_3` AS `gen_attr_5` FROM (SELECT count(1) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_7` FROM `default`.`src`) AS gen_subquery_3 WHERE (CAST(`gen_attr_6` AS DOUBLE) = CAST('90' AS DOUBLE)) GROUP BY `gen_attr_6`) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC NULLS FIRST) AS src diff --git a/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql b/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql index c3a122aa889b9..de0116a4dcbaf 100644 --- a/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql +++ b/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql @@ -7,4 +7,4 @@ having b.key in (select a.key where a.value > 'val_9' and a.value = min(b.value)) order by b.key -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > 'val_9')) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS b +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > 'val_9')) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC NULLS FIRST) AS b diff --git a/sql/hive/src/test/resources/sqlgen/window_basic_2.sql b/sql/hive/src/test/resources/sqlgen/window_basic_2.sql index ec55d4b7146f2..0e2a9a54731fc 100644 --- a/sql/hive/src/test/resources/sqlgen/window_basic_2.sql +++ b/sql/hive/src/test/resources/sqlgen/window_basic_2.sql @@ -2,4 +2,4 @@ SELECT key, value, ROUND(AVG(key) OVER (), 2) FROM parquet_t1 ORDER BY key -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `round(avg(key) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 2)` FROM (SELECT `gen_attr_0`, `gen_attr_1`, round(`gen_attr_3`, 2) AS `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, avg(`gen_attr_0`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `gen_attr_3` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2 ORDER BY `gen_attr_0` ASC) AS parquet_t1 +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `round(avg(key) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 2)` FROM (SELECT `gen_attr_0`, `gen_attr_1`, round(`gen_attr_3`, 2) AS `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, avg(`gen_attr_0`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `gen_attr_3` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2 ORDER BY `gen_attr_0` ASC NULLS FIRST) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_basic_3.sql b/sql/hive/src/test/resources/sqlgen/window_basic_3.sql index c0ac9541e67ee..d727caa583e61 100644 --- a/sql/hive/src/test/resources/sqlgen/window_basic_3.sql +++ b/sql/hive/src/test/resources/sqlgen/window_basic_3.sql @@ -2,4 +2,4 @@ SELECT value, MAX(key + 1) OVER (PARTITION BY key % 5 ORDER BY key % 7) AS max FROM parquet_t1 -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `value`, `gen_attr_1` AS `max` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_2`, gen_subquery_1.`gen_attr_3`, gen_subquery_1.`gen_attr_4`, max(`gen_attr_2`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_4` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_1` FROM (SELECT `gen_attr_0`, (`gen_attr_5` + CAST(1 AS BIGINT)) AS `gen_attr_2`, (`gen_attr_5` % CAST(5 AS BIGINT)) AS `gen_attr_3`, (`gen_attr_5` % CAST(7 AS BIGINT)) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_0` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 +SELECT `gen_attr_0` AS `value`, `gen_attr_1` AS `max` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_2`, gen_subquery_1.`gen_attr_3`, gen_subquery_1.`gen_attr_4`, max(`gen_attr_2`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_4` ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_1` FROM (SELECT `gen_attr_0`, (`gen_attr_5` + CAST(1 AS BIGINT)) AS `gen_attr_2`, (`gen_attr_5` % CAST(5 AS BIGINT)) AS `gen_attr_3`, (`gen_attr_5` % CAST(7 AS BIGINT)) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_0` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_basic_asc_nulls_last.sql b/sql/hive/src/test/resources/sqlgen/window_basic_asc_nulls_last.sql new file mode 100644 index 0000000000000..4739f05808daf --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/window_basic_asc_nulls_last.sql @@ -0,0 +1,5 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT key, value, ROUND(AVG(key) OVER (), 2) +FROM parquet_t1 ORDER BY key nulls last +-------------------------------------------------------------------------------- +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `round(avg(key) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 2)` FROM (SELECT `gen_attr_0`, `gen_attr_1`, round(`gen_attr_3`, 2) AS `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, avg(`gen_attr_0`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `gen_attr_3` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2 ORDER BY `gen_attr_0` ASC NULLS LAST) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_basic_desc_nulls_first.sql b/sql/hive/src/test/resources/sqlgen/window_basic_desc_nulls_first.sql new file mode 100644 index 0000000000000..1b9db2993b09d --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/window_basic_desc_nulls_first.sql @@ -0,0 +1,5 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT key, value, ROUND(AVG(key) OVER (), 2) +FROM parquet_t1 ORDER BY key desc nulls first +-------------------------------------------------------------------------------- +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `round(avg(key) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 2)` FROM (SELECT `gen_attr_0`, `gen_attr_1`, round(`gen_attr_3`, 2) AS `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, avg(`gen_attr_0`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `gen_attr_3` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2 ORDER BY `gen_attr_0` DESC NULLS FIRST) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_with_join.sql b/sql/hive/src/test/resources/sqlgen/window_with_join.sql index 030a4c0907a1c..43d5b47be8fba 100644 --- a/sql/hive/src/test/resources/sqlgen/window_with_join.sql +++ b/sql/hive/src/test/resources/sqlgen/window_with_join.sql @@ -2,4 +2,4 @@ SELECT x.key, MAX(y.key) OVER (PARTITION BY x.key % 5 ORDER BY x.key) FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `max(key) OVER (PARTITION BY (key % CAST(5 AS BIGINT)) ORDER BY key ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_2.`gen_attr_0`, gen_subquery_2.`gen_attr_2`, gen_subquery_2.`gen_attr_3`, max(`gen_attr_2`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_0` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_1` FROM (SELECT `gen_attr_0`, `gen_attr_2`, (`gen_attr_0` % CAST(5 AS BIGINT)) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`parquet_t1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_5` FROM `default`.`parquet_t1`) AS gen_subquery_1 ON (`gen_attr_0` = `gen_attr_2`)) AS gen_subquery_2) AS gen_subquery_3) AS x +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `max(key) OVER (PARTITION BY (key % CAST(5 AS BIGINT)) ORDER BY key ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_2.`gen_attr_0`, gen_subquery_2.`gen_attr_2`, gen_subquery_2.`gen_attr_3`, max(`gen_attr_2`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_0` ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_1` FROM (SELECT `gen_attr_0`, `gen_attr_2`, (`gen_attr_0` % CAST(5 AS BIGINT)) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`parquet_t1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_5` FROM `default`.`parquet_t1`) AS gen_subquery_1 ON (`gen_attr_0` = `gen_attr_2`)) AS gen_subquery_2) AS gen_subquery_3) AS x diff --git a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg.sql b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg.sql index 7b99539a05480..33a8e83750be0 100644 --- a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg.sql +++ b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg.sql @@ -4,4 +4,4 @@ DENSE_RANK() OVER (DISTRIBUTE BY key SORT BY key, value) AS dr, COUNT(key) FROM parquet_t1 GROUP BY key, value -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `dr`, `gen_attr_3` AS `count(key)` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2`, `gen_attr_3` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, DENSE_RANK() OVER (PARTITION BY `gen_attr_0` ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2` FROM (SELECT `gen_attr_0`, `gen_attr_1`, count(`gen_attr_0`) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_0`, `gen_attr_1`) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `dr`, `gen_attr_3` AS `count(key)` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2`, `gen_attr_3` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, DENSE_RANK() OVER (PARTITION BY `gen_attr_0` ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2` FROM (SELECT `gen_attr_0`, `gen_attr_1`, count(`gen_attr_0`) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_0`, `gen_attr_1`) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_filter.sql b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_filter.sql index 591a654a3888e..e01bc034d3d12 100644 --- a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_filter.sql +++ b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_filter.sql @@ -4,4 +4,4 @@ DENSE_RANK() OVER (DISTRIBUTE BY key SORT BY key, value) AS dr, COUNT(key) OVER(DISTRIBUTE BY key SORT BY key, value) AS ca FROM parquet_t1 -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `dr`, `gen_attr_3` AS `ca` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2`, `gen_attr_3` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, DENSE_RANK() OVER (PARTITION BY `gen_attr_0` ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2`, count(`gen_attr_0`) OVER (PARTITION BY `gen_attr_0` ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_3` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `dr`, `gen_attr_3` AS `ca` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2`, `gen_attr_3` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, DENSE_RANK() OVER (PARTITION BY `gen_attr_0` ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2`, count(`gen_attr_0`) OVER (PARTITION BY `gen_attr_0` ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_3` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_functions.sql b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_functions.sql index d9169eab6e46a..dbfa408fa517e 100644 --- a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_functions.sql +++ b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_functions.sql @@ -3,4 +3,4 @@ SELECT key, value, MAX(value) OVER (PARTITION BY key % 5 ORDER BY key) AS max FROM parquet_t1 GROUP BY key, value -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `max` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, max(`gen_attr_1`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_0` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2` FROM (SELECT `gen_attr_0`, `gen_attr_1`, (`gen_attr_0` % CAST(5 AS BIGINT)) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_0`, `gen_attr_1`) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `max` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, max(`gen_attr_1`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_0` ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2` FROM (SELECT `gen_attr_0`, `gen_attr_1`, (`gen_attr_0` % CAST(5 AS BIGINT)) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_0`, `gen_attr_1`) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 diff --git a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_having.sql b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_having.sql index f0a820811ee0a..6f5741b946262 100644 --- a/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_having.sql +++ b/sql/hive/src/test/resources/sqlgen/window_with_the_same_window_with_agg_having.sql @@ -3,4 +3,4 @@ SELECT key, value, MAX(value) OVER (PARTITION BY key % 5 ORDER BY key DESC) AS max FROM parquet_t1 GROUP BY key, value HAVING key > 5 -------------------------------------------------------------------------------- -SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `max` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, max(`gen_attr_1`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_0` DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2` FROM (SELECT `gen_attr_0`, `gen_attr_1`, (`gen_attr_0` % CAST(5 AS BIGINT)) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_0`, `gen_attr_1` HAVING (`gen_attr_0` > CAST(5 AS BIGINT))) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 +SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value`, `gen_attr_2` AS `max` FROM (SELECT `gen_attr_0`, `gen_attr_1`, `gen_attr_2` FROM (SELECT gen_subquery_1.`gen_attr_0`, gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, max(`gen_attr_1`) OVER (PARTITION BY `gen_attr_3` ORDER BY `gen_attr_0` DESC NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `gen_attr_2` FROM (SELECT `gen_attr_0`, `gen_attr_1`, (`gen_attr_0` % CAST(5 AS BIGINT)) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY `gen_attr_0`, `gen_attr_1` HAVING (`gen_attr_0` > CAST(5 AS BIGINT))) AS gen_subquery_1) AS gen_subquery_2) AS parquet_t1 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala index d80f894c22dd8..7fa5c29dc5b8f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala @@ -235,6 +235,16 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkSQL("SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key, MAX(key)", "agg3") } + test("order by asc nulls last") { + checkSQL("SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key nulls last, MAX(key)", + "sort_asc_nulls_last") + } + + test("order by desc nulls first") { + checkSQL("SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key desc nulls first," + + "MAX(key)", "sort_desc_nulls_first") + } + test("type widening in union") { checkSQL("SELECT id FROM parquet_t0 UNION ALL SELECT CAST(id AS INT) AS id FROM parquet_t0", "type_widening") @@ -697,6 +707,20 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { |FROM parquet_t1 """.stripMargin, "window_basic_3") + + checkSQL( + """ + |SELECT key, value, ROUND(AVG(key) OVER (), 2) + |FROM parquet_t1 ORDER BY key nulls last + """.stripMargin, + "window_basic_asc_nulls_last") + + checkSQL( + """ + |SELECT key, value, ROUND(AVG(key) OVER (), 2) + |FROM parquet_t1 ORDER BY key desc nulls first + """.stripMargin, + "window_basic_desc_nulls_first") } test("multiple window functions in one expression") { From c2ae3a80b6ed8afd83412759de00e7f098f07511 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 2 Sep 2016 10:33:10 -0700 Subject: [PATCH 11/19] SPARK-10747: change the null definition in SortOrder --- .../unsafe/sort/PrefixComparator.java | 1 - .../unsafe/sort/PrefixComparators.java | 82 ++----------------- .../unsafe/sort/UnsafeInMemorySorter.java | 10 +-- .../unsafe/sort/RadixSortSuite.scala | 3 +- .../sql/catalyst/expressions/SortOrder.scala | 21 ++++- .../spark/sql/execution/SortPrefixUtils.scala | 2 +- .../catalyst/ExpressionSQLBuilderSuite.scala | 6 +- 7 files changed, 37 insertions(+), 88 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java index be738d29d98af..45b78829e4cf7 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparator.java @@ -25,6 +25,5 @@ */ @Private public abstract class PrefixComparator { - public enum NullOrder {FIRST, LAST} public abstract int compare(long prefix1, long prefix2); } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index 65b4f987f9e42..27581a9e9b8b8 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -108,7 +108,7 @@ public abstract static class RadixSortSupport extends PrefixComparator { public abstract boolean sortSigned(); /** @return Whether the sort should put null first or last. */ - public abstract NullOrder nullOrder(); + public abstract boolean nullFirst(); } // @@ -119,16 +119,8 @@ public abstract static class RadixSortSupport extends PrefixComparator { public static final class UnsignedPrefixComparator extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } - @Override public NullOrder nullOrder() { return NullOrder.FIRST; } + @Override public boolean nullFirst() { return true; } public int compare(long aPrefix, long bPrefix) { - if (aPrefix == 0L && bPrefix == 0L) { - return 0; - } - if (aPrefix == 0L) { - return -1; - } else if (bPrefix == 0L) { - return 1; - } return UnsignedLongs.compare(aPrefix, bPrefix); } } @@ -137,16 +129,8 @@ public int compare(long aPrefix, long bPrefix) { public static final class UnsignedPrefixComparatorNullLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } - @Override public NullOrder nullOrder() { return NullOrder.LAST; } + @Override public boolean nullFirst() { return false; } public int compare(long aPrefix, long bPrefix) { - if (aPrefix == 0L && bPrefix == 0L) { - return 0; - } - if (aPrefix == 0L) { - return 1; - } else if (bPrefix == 0L) { - return -1; - } return UnsignedLongs.compare(aPrefix, bPrefix); } } @@ -155,16 +139,8 @@ public int compare(long aPrefix, long bPrefix) { public static final class UnsignedPrefixComparatorDescNullFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } - @Override public NullOrder nullOrder() { return NullOrder.FIRST; } + @Override public boolean nullFirst() {return true;} public int compare(long bPrefix, long aPrefix) { - if (aPrefix == 0L && bPrefix == 0L) { - return 0; - } - if (bPrefix == 0L) { - return -1; - } else if (aPrefix == 0L) { - return 1; - } return UnsignedLongs.compare(aPrefix, bPrefix); } } @@ -173,16 +149,8 @@ public int compare(long bPrefix, long aPrefix) { public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } - @Override public NullOrder nullOrder() { return NullOrder.LAST; } + @Override public boolean nullFirst() { return false; } public int compare(long bPrefix, long aPrefix) { - if (aPrefix == 0L && bPrefix == 0L) { - return 0; - } - if (bPrefix == 0L) { - return 1; - } else if (aPrefix == 0L) { - return -1; - } return UnsignedLongs.compare(aPrefix, bPrefix); } } @@ -191,16 +159,8 @@ public int compare(long bPrefix, long aPrefix) { public static final class SignedPrefixComparator extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } - @Override public NullOrder nullOrder() { return NullOrder.FIRST; } + @Override public boolean nullFirst() { return true; } public int compare(long a, long b) { - if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { - return 0; - } - if (a == Long.MIN_VALUE) { - return -1; - } else if (b == Long.MIN_VALUE) { - return 1; - } return (a < b) ? -1 : (a > b) ? 1 : 0; } } @@ -209,16 +169,8 @@ public int compare(long a, long b) { public static final class SignedPrefixComparatorNullLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } - @Override public NullOrder nullOrder() { return NullOrder.LAST; } + @Override public boolean nullFirst() { return false; } public int compare(long a, long b) { - if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { - return 0; - } - if (a == Long.MIN_VALUE) { - return 1; - } else if (b == Long.MIN_VALUE) { - return -1; - } return (a < b) ? -1 : (a > b) ? 1 : 0; } } @@ -227,16 +179,8 @@ public int compare(long a, long b) { public static final class SignedPrefixComparatorDescNullFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } - @Override public NullOrder nullOrder() { return NullOrder.FIRST; } + @Override public boolean nullFirst() { return true; } public int compare(long b, long a) { - if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { - return 0; - } - if (b == Long.MIN_VALUE) { - return -1; - } else if (a == Long.MIN_VALUE) { - return 1; - } return (a < b) ? -1 : (a > b) ? 1 : 0; } } @@ -245,16 +189,8 @@ public int compare(long b, long a) { public static final class SignedPrefixComparatorDesc extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } - @Override public NullOrder nullOrder() { return NullOrder.LAST; } + @Override public boolean nullFirst() { return false; } public int compare(long b, long a) { - if (a == Long.MIN_VALUE && b == Long.MIN_VALUE) { - return 0; - } - if (b == Long.MIN_VALUE) { - return 1; - } else if (a == Long.MIN_VALUE) { - return -1; - } return (a < b) ? -1 : (a > b) ? 1 : 0; } } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index a158d4e13ec80..dbef683b02c39 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -335,16 +335,16 @@ public UnsafeSorterIterator getSortedIterator() { LinkedList queue = new LinkedList<>(); // The null order is either LAST or FIRST, regardless of sorting direction (ASC|DESC) - if (radixSortSupport.nullOrder() == PrefixComparator.NullOrder.LAST) { - queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); - queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); - } else if (radixSortSupport.nullOrder() == PrefixComparator.NullOrder.FIRST) { + if (radixSortSupport.nullFirst()) { queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); + } else { + queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); + queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); } return new UnsafeExternalSorter.ChainedIterator(queue); } else { return new SortedIterator(pos / 2, offset); } } -} +} \ No newline at end of file diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index eed79f853c169..dc70b90fcd2a8 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -27,7 +27,6 @@ import org.apache.spark.internal.Logging import org.apache.spark.unsafe.array.LongArray import org.apache.spark.unsafe.memory.MemoryBlock import org.apache.spark.util.collection.Sorter -import org.apache.spark.util.collection.unsafe.sort.PrefixComparator.NullOrder import org.apache.spark.util.random.XORShiftRandom class RadixSortSuite extends SparkFunSuite with Logging { @@ -66,7 +65,7 @@ class RadixSortSuite extends SparkFunSuite with Logging { new PrefixComparators.RadixSortSupport { override def sortDescending = false override def sortSigned = false - override def nullOrder = NullOrder.FIRST + override def nullFirst = true override def compare(a: Long, b: Long): Int = { return PrefixComparators.BINARY.compare(a & 0xffffff0000L, b & 0xffffff0000L) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 7977802f4741f..6eb3826014c38 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -96,11 +96,26 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { val nullValue = child.child.dataType match { case BooleanType | DateType | TimestampType | _: IntegralType => - Long.MinValue + if ((child.isAscending && child.nullOrdering == NullFirst) || + (!child.isAscending && child.nullOrdering == NullLast)) { + Long.MinValue + } else { + Long.MaxValue + } case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - Long.MinValue + if ((child.isAscending && child.nullOrdering == NullFirst) || + (!child.isAscending && child.nullOrdering == NullLast)) { + Long.MinValue + } else { + Long.MaxValue + } case _: DecimalType => - DoublePrefixComparator.computePrefix(Double.NegativeInfinity) + if ((child.isAscending && child.nullOrdering == NullFirst) || + (!child.isAscending && child.nullOrdering == NullLast)) { + DoublePrefixComparator.computePrefix(Double.NegativeInfinity) + } else { + DoublePrefixComparator.computePrefix(Double.PositiveInfinity) + } case _ => 0L } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index f44d3558a44d8..d1283f70f2875 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -55,7 +55,7 @@ object SortPrefixUtils { private def getPrefixComparatorWithNullOrder( sortOrder: SortOrder, signedType: String): PrefixComparator = { sortOrder.direction match { - case Ascending if (sortOrder.nullOrdering == NullLast) => + case Ascending if (sortOrder == NullLast) => signedType match { case "LONG" => PrefixComparators.LONG_NULLLAST case "STRING" => PrefixComparators.STRING_NULLLAST diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala index d2b2f38fa1f71..ce5efe853ca4f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala @@ -106,17 +106,17 @@ class ExpressionSQLBuilderSuite extends SQLBuilderTest { checkSQL( WindowSpecDefinition(Nil, 'a.int.asc :: Nil, frame), - s"(ORDER BY `a` ASC $frame)" + s"(ORDER BY `a` ASC NULLS FIRST $frame)" ) checkSQL( WindowSpecDefinition(Nil, 'a.int.asc :: 'b.string.desc :: Nil, frame), - s"(ORDER BY `a` ASC, `b` DESC $frame)" + s"(ORDER BY `a` ASC NULLS FIRST, `b` DESC NULLS LAST $frame)" ) checkSQL( WindowSpecDefinition('a.int :: 'b.string :: Nil, 'c.int.asc :: 'd.string.desc :: Nil, frame), - s"(PARTITION BY `a`, `b` ORDER BY `c` ASC, `d` DESC $frame)" + s"(PARTITION BY `a`, `b` ORDER BY `c` ASC NULLS FIRST, `d` DESC NULLS LAST $frame)" ) } } From 50777bf77390c1a613c01705f47b0a767dea439a Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 9 Sep 2016 14:27:48 -0700 Subject: [PATCH 12/19] SPARK-10747: handles SortSuite randomized array sorting --- .../unsafe/sort/PrefixComparators.java | 4 +-- .../spark/sql/catalyst/dsl/package.scala | 3 ++- .../sql/catalyst/expressions/SortOrder.scala | 26 ++++++++++++++----- .../spark/sql/execution/SortPrefixUtils.scala | 2 +- .../spark/sql/execution/SortSuite.scala | 3 ++- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index 27581a9e9b8b8..42f448bc86a35 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -74,9 +74,7 @@ public static long computePrefix(UTF8String value) { } public static final class BinaryPrefixComparator { - public static long computePrefix(byte[] bytes) { - return ByteArray.getPrefix(bytes); - } + public static long computePrefix(byte[] bytes) { return ByteArray.getPrefix(bytes); } } public static final class DoublePrefixComparator { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 8549187a66369..3db111f2aabdc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -109,8 +109,9 @@ package object dsl { def cast(to: DataType): Expression = Cast(expr, to) def asc: SortOrder = SortOrder(expr, Ascending) + def asc_nullLast: SortOrder = SortOrder(expr, Ascending, NullLast) def desc: SortOrder = SortOrder(expr, Descending) - + def desc_nullFirst: SortOrder = SortOrder(expr, Descending, NullFirst) def as(alias: String): NamedExpression = Alias(expr, alias)() def as(alias: Symbol): NamedExpression = Alias(expr, alias.name)() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 6eb3826014c38..13acd4ce35742 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -20,9 +20,8 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} -import org.apache.spark.sql.types._ -import org.apache.spark.util.collection.unsafe.sort.PrefixComparators.BinaryPrefixComparator -import org.apache.spark.util.collection.unsafe.sort.PrefixComparators.DoublePrefixComparator +import org.apache.spark.sql.types.{BinaryType, StringType, _} +import org.apache.spark.util.collection.unsafe.sort.PrefixComparators.{BinaryPrefixComparator, DoublePrefixComparator, StringPrefixComparator} abstract sealed class SortDirection { def sql: String @@ -114,9 +113,23 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { (!child.isAscending && child.nullOrdering == NullLast)) { DoublePrefixComparator.computePrefix(Double.NegativeInfinity) } else { - DoublePrefixComparator.computePrefix(Double.PositiveInfinity) + DoublePrefixComparator.computePrefix(Double.NaN) + } + case BinaryType => + if ((child.isAscending && child.nullOrdering == NullFirst) || + (!child.isAscending && child.nullOrdering == NullLast)) { + 0L + } else { + // This may not be the best choice for null value + DoublePrefixComparator.computePrefix(Double.MaxValue) + } + case _ => + if ((child.isAscending && child.nullOrdering == NullFirst) || + (!child.isAscending && child.nullOrdering == NullLast)) { + 0L + } else { + DoublePrefixComparator.computePrefix(Double.NaN) } - case _ => 0L } override def eval(input: InternalRow): Any = throw new UnsupportedOperationException @@ -126,6 +139,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { val input = childCode.value val BinaryPrefixCmp = classOf[BinaryPrefixComparator].getName val DoublePrefixCmp = classOf[DoublePrefixComparator].getName + val StringPrefixCmp = classOf[StringPrefixComparator].getName val prefixCode = child.child.dataType match { case BooleanType => s"$input ? 1L : 0L" @@ -135,7 +149,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { s"(long) $input" case FloatType | DoubleType => s"$DoublePrefixCmp.computePrefix((double)$input)" - case StringType => s"$input.getPrefix()" + case StringType => s"$StringPrefixCmp.computePrefix($input)" case BinaryType => s"$BinaryPrefixCmp.computePrefix($input)" case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => if (dt.precision <= Decimal.MAX_LONG_DIGITS) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index d1283f70f2875..f44d3558a44d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -55,7 +55,7 @@ object SortPrefixUtils { private def getPrefixComparatorWithNullOrder( sortOrder: SortOrder, signedType: String): PrefixComparator = { sortOrder.direction match { - case Ascending if (sortOrder == NullLast) => + case Ascending if (sortOrder.nullOrdering == NullLast) => signedType match { case "LONG" => PrefixComparators.LONG_NULLLAST case "STRING" => PrefixComparators.STRING_NULLLAST diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala index ba3fa3732d0df..31d70714bcffe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala @@ -101,7 +101,8 @@ class SortSuite extends SparkPlanTest with SharedSQLContext { for ( dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType); nullable <- Seq(true, false); - sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil); + sortOrder <- + Seq('a.asc :: Nil, 'a.asc_nullLast :: Nil, 'a.desc :: Nil, 'a.desc_nullFirst :: Nil); randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable) ) { test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") { From 19905b98d281365fe31adafad85bf18c3488ae11 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 9 Sep 2016 17:02:00 -0700 Subject: [PATCH 13/19] SPARK-10747: BinaryType case --- .../spark/sql/catalyst/expressions/SortOrder.scala | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 13acd4ce35742..6f2a3eb26c3bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -115,19 +115,12 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { } else { DoublePrefixComparator.computePrefix(Double.NaN) } - case BinaryType => - if ((child.isAscending && child.nullOrdering == NullFirst) || - (!child.isAscending && child.nullOrdering == NullLast)) { - 0L - } else { - // This may not be the best choice for null value - DoublePrefixComparator.computePrefix(Double.MaxValue) - } case _ => if ((child.isAscending && child.nullOrdering == NullFirst) || (!child.isAscending && child.nullOrdering == NullLast)) { 0L } else { + // This may not be the best choice for null value for BinaryType DoublePrefixComparator.computePrefix(Double.NaN) } } From ccdec132fdaa276ba8ffa0a76d673b06a83bbf96 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 9 Sep 2016 18:57:01 -0700 Subject: [PATCH 14/19] SPARK-10747: use -1L for null value for unsigned prefix comparator --- .../unsafe/sort/PrefixComparators.java | 36 +++++++------------ .../unsafe/sort/RadixSortSuite.scala | 9 ++--- .../sql/catalyst/expressions/SortOrder.scala | 3 +- 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index 42f448bc86a35..8fccb3c57a086 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -27,45 +27,33 @@ public class PrefixComparators { private PrefixComparators() {} - public static final PrefixComparator STRING = - new UnsignedPrefixComparator(); - public static final PrefixComparator STRING_NULLLAST = - new UnsignedPrefixComparatorNullLast(); + public static final PrefixComparator STRING = new UnsignedPrefixComparator(); + public static final PrefixComparator STRING_NULLLAST = new UnsignedPrefixComparatorNullLast(); public static final PrefixComparator STRING_DESC_NULLFIRST = new UnsignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator STRING_DESC = - new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(); - public static final PrefixComparator BINARY = - new UnsignedPrefixComparator(); - public static final PrefixComparator BINARY_NULLLAST = - new UnsignedPrefixComparatorNullLast(); + public static final PrefixComparator BINARY = new UnsignedPrefixComparator(); + public static final PrefixComparator BINARY_NULLLAST = new UnsignedPrefixComparatorNullLast(); public static final PrefixComparator BINARY_DESC_NULLFIRST = new UnsignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator BINARY_DESC = - new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(); - public static final PrefixComparator LONG = - new SignedPrefixComparator(); - public static final PrefixComparator LONG_NULLLAST = - new SignedPrefixComparatorNullLast(); + public static final PrefixComparator LONG = new SignedPrefixComparator(); + public static final PrefixComparator LONG_NULLLAST = new SignedPrefixComparatorNullLast(); public static final PrefixComparator LONG_DESC_NULLFIRST = new SignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator LONG_DESC = - new SignedPrefixComparatorDesc(); + public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(); - public static final PrefixComparator DOUBLE = - new UnsignedPrefixComparator(); - public static final PrefixComparator DOUBLE_NULLLAST = - new UnsignedPrefixComparatorNullLast(); + public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(); + public static final PrefixComparator DOUBLE_NULLLAST = new UnsignedPrefixComparatorNullLast(); public static final PrefixComparator DOUBLE_DESC_NULLFIRST = new UnsignedPrefixComparatorDescNullFirst(); - public static final PrefixComparator DOUBLE_DESC = - new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(); public static final class StringPrefixComparator { public static long computePrefix(UTF8String value) { diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index dc70b90fcd2a8..cf6e936c3e5d4 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -46,20 +46,21 @@ class RadixSortSuite extends SparkFunSuite with Logging { RadixSortType("unsigned binary data asc nulls first", PrefixComparators.BINARY, 0, 7, false, false, true), RadixSortType("unsigned binary data asc nulls last", - PrefixComparators.BINARY, 0, 7, false, false, false), + PrefixComparators.BINARY_NULLLAST, 0, 7, false, false, false), RadixSortType("unsigned binary data desc nulls last", - PrefixComparators.BINARY_DESC, 0, 7, true, false, false), + PrefixComparators.BINARY_DESC_NULLFIRST, 0, 7, true, false, false), RadixSortType("unsigned binary data desc nulls first", PrefixComparators.BINARY_DESC, 0, 7, true, false, true), RadixSortType("twos complement asc nulls first", PrefixComparators.LONG, 0, 7, false, true, true), RadixSortType("twos complement asc nulls last", - PrefixComparators.LONG, 0, 7, false, true, false), + PrefixComparators.LONG_NULLLAST, 0, 7, false, true, false), RadixSortType("twos complement desc nulls last", PrefixComparators.LONG_DESC, 0, 7, true, true, false), RadixSortType("twos complement desc nulls first", - PrefixComparators.LONG_DESC, 0, 7, true, true, true), + PrefixComparators.LONG_DESC_NULLFIRST, 0, 7, true, true, true), + RadixSortType( "binary data partial", new PrefixComparators.RadixSortSupport { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 6f2a3eb26c3bf..fa13c39524009 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -120,8 +120,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { (!child.isAscending && child.nullOrdering == NullLast)) { 0L } else { - // This may not be the best choice for null value for BinaryType - DoublePrefixComparator.computePrefix(Double.NaN) + -1L } } From 7fab3a91e3b87397a94231da00ce9882815b7df9 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 9 Sep 2016 22:57:54 -0700 Subject: [PATCH 15/19] SPARK-10747: coding style updates upon review --- .../unsafe/sort/PrefixComparators.java | 64 ++++++------- .../unsafe/sort/UnsafeInMemorySorter.java | 2 +- .../unsafe/sort/RadixSortSuite.scala | 10 +-- .../spark/sql/catalyst/dsl/package.scala | 4 +- .../sql/catalyst/expressions/SortOrder.scala | 29 +++--- .../codegen/GenerateOrdering.scala | 8 +- .../sql/catalyst/expressions/ordering.scala | 4 +- .../sql/catalyst/parser/AstBuilder.scala | 4 +- .../spark/sql/execution/SortPrefixUtils.scala | 90 +++++++++++-------- 9 files changed, 110 insertions(+), 105 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index 8fccb3c57a086..c5cc9385f9f62 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -28,32 +28,24 @@ public class PrefixComparators { private PrefixComparators() {} public static final PrefixComparator STRING = new UnsignedPrefixComparator(); - public static final PrefixComparator STRING_NULLLAST = new UnsignedPrefixComparatorNullLast(); - - public static final PrefixComparator STRING_DESC_NULLFIRST = - new UnsignedPrefixComparatorDescNullFirst(); public static final PrefixComparator STRING_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator STRING_NULLS_LAST = new UnsignedPrefixComparatorNullsLast(); + public static final PrefixComparator STRING_DESC_NULLS_FIRST = new UnsignedPrefixComparatorDescNullsFirst(); public static final PrefixComparator BINARY = new UnsignedPrefixComparator(); - public static final PrefixComparator BINARY_NULLLAST = new UnsignedPrefixComparatorNullLast(); - - public static final PrefixComparator BINARY_DESC_NULLFIRST = - new UnsignedPrefixComparatorDescNullFirst(); public static final PrefixComparator BINARY_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator BINARY_NULLS_LAST = new UnsignedPrefixComparatorNullsLast(); + public static final PrefixComparator BINARY_DESC_NULLS_FIRST = new UnsignedPrefixComparatorDescNullsFirst(); public static final PrefixComparator LONG = new SignedPrefixComparator(); - public static final PrefixComparator LONG_NULLLAST = new SignedPrefixComparatorNullLast(); - - public static final PrefixComparator LONG_DESC_NULLFIRST = - new SignedPrefixComparatorDescNullFirst(); public static final PrefixComparator LONG_DESC = new SignedPrefixComparatorDesc(); + public static final PrefixComparator LONG_NULLS_LAST = new SignedPrefixComparatorNullsLast(); + public static final PrefixComparator LONG_DESC_NULLS_FIRST = new SignedPrefixComparatorDescNullsFirst(); public static final PrefixComparator DOUBLE = new UnsignedPrefixComparator(); - public static final PrefixComparator DOUBLE_NULLLAST = new UnsignedPrefixComparatorNullLast(); - - public static final PrefixComparator DOUBLE_DESC_NULLFIRST = - new UnsignedPrefixComparatorDescNullFirst(); public static final PrefixComparator DOUBLE_DESC = new UnsignedPrefixComparatorDesc(); + public static final PrefixComparator DOUBLE_NULLS_LAST = new UnsignedPrefixComparatorNullsLast(); + public static final PrefixComparator DOUBLE_DESC_NULLS_FIRST = new UnsignedPrefixComparatorDescNullsFirst(); public static final class StringPrefixComparator { public static long computePrefix(UTF8String value) { @@ -62,7 +54,9 @@ public static long computePrefix(UTF8String value) { } public static final class BinaryPrefixComparator { - public static long computePrefix(byte[] bytes) { return ByteArray.getPrefix(bytes); } + public static long computePrefix(byte[] bytes) { + return ByteArray.getPrefix(bytes); + } } public static final class DoublePrefixComparator { @@ -93,89 +87,81 @@ public abstract static class RadixSortSupport extends PrefixComparator { /** @return Whether the sort should take into account the sign bit. */ public abstract boolean sortSigned(); - /** @return Whether the sort should put null first or last. */ - public abstract boolean nullFirst(); + /** @return Whether the sort should put nulls first or last. */ + public abstract boolean nullsFirst(); } // // Standard prefix comparator implementations // - // unsigned asc null first (default) public static final class UnsignedPrefixComparator extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } - @Override public boolean nullFirst() { return true; } + @Override public boolean nullsFirst() { return true; } public int compare(long aPrefix, long bPrefix) { return UnsignedLongs.compare(aPrefix, bPrefix); } } - // unsigned asc null last - public static final class UnsignedPrefixComparatorNullLast extends RadixSortSupport { + public static final class UnsignedPrefixComparatorNullsLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return false; } - @Override public boolean nullFirst() { return false; } + @Override public boolean nullsFirst() { return false; } public int compare(long aPrefix, long bPrefix) { return UnsignedLongs.compare(aPrefix, bPrefix); } } - // unsigned desc null first - public static final class UnsignedPrefixComparatorDescNullFirst extends RadixSortSupport { + public static final class UnsignedPrefixComparatorDescNullsFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } - @Override public boolean nullFirst() {return true;} + @Override public boolean nullsFirst() { return true; } public int compare(long bPrefix, long aPrefix) { return UnsignedLongs.compare(aPrefix, bPrefix); } } - // unsigned desc null last (default) public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return false; } - @Override public boolean nullFirst() { return false; } + @Override public boolean nullsFirst() { return false; } public int compare(long bPrefix, long aPrefix) { return UnsignedLongs.compare(aPrefix, bPrefix); } } - // signed asc null first (default) public static final class SignedPrefixComparator extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } - @Override public boolean nullFirst() { return true; } + @Override public boolean nullsFirst() { return true; } public int compare(long a, long b) { return (a < b) ? -1 : (a > b) ? 1 : 0; } } - // signed asc null last - public static final class SignedPrefixComparatorNullLast extends RadixSortSupport { + public static final class SignedPrefixComparatorNullsLast extends RadixSortSupport { @Override public boolean sortDescending() { return false; } @Override public boolean sortSigned() { return true; } - @Override public boolean nullFirst() { return false; } + @Override public boolean nullsFirst() { return false; } public int compare(long a, long b) { return (a < b) ? -1 : (a > b) ? 1 : 0; } } - // signed desc null first - public static final class SignedPrefixComparatorDescNullFirst extends RadixSortSupport { + public static final class SignedPrefixComparatorDescNullsFirst extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } - @Override public boolean nullFirst() { return true; } + @Override public boolean nullsFirst() { return true; } public int compare(long b, long a) { return (a < b) ? -1 : (a > b) ? 1 : 0; } } - // signed desc null last (default) public static final class SignedPrefixComparatorDesc extends RadixSortSupport { @Override public boolean sortDescending() { return true; } @Override public boolean sortSigned() { return true; } - @Override public boolean nullFirst() { return false; } + @Override public boolean nullsFirst() { return false; } public int compare(long b, long a) { return (a < b) ? -1 : (a > b) ? 1 : 0; } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index dbef683b02c39..be382955c0d42 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -335,7 +335,7 @@ public UnsafeSorterIterator getSortedIterator() { LinkedList queue = new LinkedList<>(); // The null order is either LAST or FIRST, regardless of sorting direction (ASC|DESC) - if (radixSortSupport.nullFirst()) { + if (radixSortSupport.nullsFirst()) { queue.add(new SortedIterator(nullBoundaryPos / 2, 0)); queue.add(new SortedIterator((pos - nullBoundaryPos) / 2, offset)); } else { diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index cf6e936c3e5d4..83786e4769688 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -46,27 +46,27 @@ class RadixSortSuite extends SparkFunSuite with Logging { RadixSortType("unsigned binary data asc nulls first", PrefixComparators.BINARY, 0, 7, false, false, true), RadixSortType("unsigned binary data asc nulls last", - PrefixComparators.BINARY_NULLLAST, 0, 7, false, false, false), + PrefixComparators.BINARY_NULLS_LAST, 0, 7, false, false, false), RadixSortType("unsigned binary data desc nulls last", - PrefixComparators.BINARY_DESC_NULLFIRST, 0, 7, true, false, false), + PrefixComparators.BINARY_DESC_NULLS_FIRST, 0, 7, true, false, false), RadixSortType("unsigned binary data desc nulls first", PrefixComparators.BINARY_DESC, 0, 7, true, false, true), RadixSortType("twos complement asc nulls first", PrefixComparators.LONG, 0, 7, false, true, true), RadixSortType("twos complement asc nulls last", - PrefixComparators.LONG_NULLLAST, 0, 7, false, true, false), + PrefixComparators.LONG_NULLS_LAST, 0, 7, false, true, false), RadixSortType("twos complement desc nulls last", PrefixComparators.LONG_DESC, 0, 7, true, true, false), RadixSortType("twos complement desc nulls first", - PrefixComparators.LONG_DESC_NULLFIRST, 0, 7, true, true, true), + PrefixComparators.LONG_DESC_NULLS_FIRST, 0, 7, true, true, true), RadixSortType( "binary data partial", new PrefixComparators.RadixSortSupport { override def sortDescending = false override def sortSigned = false - override def nullFirst = true + override def nullsFirst = true override def compare(a: Long, b: Long): Int = { return PrefixComparators.BINARY.compare(a & 0xffffff0000L, b & 0xffffff0000L) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 3db111f2aabdc..23db2e9615fe2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -109,9 +109,9 @@ package object dsl { def cast(to: DataType): Expression = Cast(expr, to) def asc: SortOrder = SortOrder(expr, Ascending) - def asc_nullLast: SortOrder = SortOrder(expr, Ascending, NullLast) + def asc_nullLast: SortOrder = SortOrder(expr, Ascending, NullsLast) def desc: SortOrder = SortOrder(expr, Descending) - def desc_nullFirst: SortOrder = SortOrder(expr, Descending, NullFirst) + def desc_nullFirst: SortOrder = SortOrder(expr, Descending, NullsFirst) def as(alias: String): NamedExpression = Alias(expr, alias)() def as(alias: Symbol): NamedExpression = Alias(expr, alias.name)() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index fa13c39524009..92ffe0a5792ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -34,20 +34,20 @@ abstract sealed class NullOrdering { case object Ascending extends SortDirection { override def sql: String = "ASC" - override def defaultNullOrdering: NullOrdering = NullFirst + override def defaultNullOrdering: NullOrdering = NullsFirst } // default null order is last for desc case object Descending extends SortDirection { override def sql: String = "DESC" - override def defaultNullOrdering: NullOrdering = NullLast + override def defaultNullOrdering: NullOrdering = NullsLast } -case object NullFirst extends NullOrdering{ +case object NullsFirst extends NullOrdering{ override def sql: String = "NULLS FIRST" } -case object NullLast extends NullOrdering{ +case object NullsLast extends NullOrdering{ override def sql: String = "NULLS LAST" } @@ -95,35 +95,40 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { val nullValue = child.child.dataType match { case BooleanType | DateType | TimestampType | _: IntegralType => - if ((child.isAscending && child.nullOrdering == NullFirst) || - (!child.isAscending && child.nullOrdering == NullLast)) { + if (nullAsSmallest) { Long.MinValue } else { Long.MaxValue } case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - if ((child.isAscending && child.nullOrdering == NullFirst) || - (!child.isAscending && child.nullOrdering == NullLast)) { + if (nullAsSmallest) { Long.MinValue } else { Long.MaxValue } case _: DecimalType => - if ((child.isAscending && child.nullOrdering == NullFirst) || - (!child.isAscending && child.nullOrdering == NullLast)) { + if (nullAsSmallest) { DoublePrefixComparator.computePrefix(Double.NegativeInfinity) } else { DoublePrefixComparator.computePrefix(Double.NaN) } case _ => - if ((child.isAscending && child.nullOrdering == NullFirst) || - (!child.isAscending && child.nullOrdering == NullLast)) { + if (nullAsSmallest) { 0L } else { -1L } } + def nullAsSmallest: Boolean = { + if ((child.isAscending && child.nullOrdering == NullsFirst) || + (!child.isAscending && child.nullOrdering == NullsLast)) { + true + } else { + false + } + } + override def eval(input: InternalRow): Any = throw new UnsupportedOperationException override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 758d3544ae367..e7df95e1142ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -101,14 +101,14 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } else if ($isNullA) { return ${ order.nullOrdering match { - case NullFirst => "-1" - case NullLast => "1" + case NullsFirst => "-1" + case NullsLast => "1" }}; } else if ($isNullB) { return ${ order.nullOrdering match { - case NullFirst => "1" - case NullLast => "-1" + case NullsFirst => "1" + case NullsLast => "-1" }}; } else { int comp = ${ctx.genComp(order.child.dataType, primitiveA, primitiveB)}; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala index c47427d3c0ce6..79d2052c38a27 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala @@ -39,9 +39,9 @@ class InterpretedOrdering(ordering: Seq[SortOrder]) extends Ordering[InternalRow if (left == null && right == null) { // Both null, continue looking. } else if (left == null) { - return if (order.nullOrdering == NullFirst) -1 else 1 + return if (order.nullOrdering == NullsFirst) -1 else 1 } else if (right == null) { - return if (order.nullOrdering == NullFirst) 1 else -1 + return if (order.nullOrdering == NullsFirst) 1 else -1 } else { val comparison = order.dataType match { case dt: AtomicType if order.direction == Ascending => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4b3dac89466b5..69d68fa6f92ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1212,9 +1212,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { Ascending } val nullOrdering = if (ctx.FIRST != null) { - NullFirst + NullsFirst } else if (ctx.LAST != null) { - NullLast + NullsLast } else { direction.defaultNullOrdering } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index f44d3558a44d8..c6665d273fd27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -40,53 +40,67 @@ object SortPrefixUtils { def getPrefixComparator(sortOrder: SortOrder): PrefixComparator = { sortOrder.dataType match { - case StringType => getPrefixComparatorWithNullOrder(sortOrder, "STRING") - case BinaryType => getPrefixComparatorWithNullOrder(sortOrder, "BINARY") + case StringType => stringPrefixComparator(sortOrder) + case BinaryType => binaryPrefixComparator(sortOrder) case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType => - getPrefixComparatorWithNullOrder(sortOrder, "LONG") + longPrefixComparator(sortOrder) case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - getPrefixComparatorWithNullOrder(sortOrder, "LONG") - case FloatType | DoubleType => getPrefixComparatorWithNullOrder(sortOrder, "DOUBLE") - case dt: DecimalType => getPrefixComparatorWithNullOrder(sortOrder, "DOUBLE") + longPrefixComparator(sortOrder) + case FloatType | DoubleType => doublePrefixComparator(sortOrder) + case dt: DecimalType => doublePrefixComparator(sortOrder) case _ => NoOpPrefixComparator } } - private def getPrefixComparatorWithNullOrder( - sortOrder: SortOrder, signedType: String): PrefixComparator = { + private def stringPrefixComparator(sortOrder: SortOrder): PrefixComparator = { sortOrder.direction match { - case Ascending if (sortOrder.nullOrdering == NullLast) => - signedType match { - case "LONG" => PrefixComparators.LONG_NULLLAST - case "STRING" => PrefixComparators.STRING_NULLLAST - case "BINARY" => PrefixComparators.BINARY_NULLLAST - case "DOUBLE" => PrefixComparators.DOUBLE_NULLLAST - } + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.STRING_NULLS_LAST case Ascending => - // or the default NULLS FIRST - signedType match { - case "LONG" => PrefixComparators.LONG - case "STRING" => PrefixComparators.STRING - case "BINARY" => PrefixComparators.BINARY - case "DOUBLE" => PrefixComparators.DOUBLE - } - case Descending if (sortOrder.nullOrdering == NullFirst) => - signedType match { - case "LONG" => PrefixComparators.LONG_DESC_NULLFIRST - case "STRING" => PrefixComparators.STRING_DESC_NULLFIRST - case "BINARY" => PrefixComparators.BINARY_DESC_NULLFIRST - case "DOUBLE" => PrefixComparators.DOUBLE_DESC_NULLFIRST - } + PrefixComparators.STRING + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.STRING_DESC_NULLS_FIRST case Descending => - // or the default NULLS LAST - signedType match { - case "LONG" => PrefixComparators.LONG_DESC - case "STRING" => PrefixComparators.STRING_DESC - case "BINARY" => PrefixComparators.BINARY_DESC - case "DOUBLE" => PrefixComparators.DOUBLE_DESC - } - case _ => throw new IllegalArgumentException( - "This should not happen. Contact Spark contributors for this error.") + PrefixComparators.STRING_DESC + } + } + + private def binaryPrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.BINARY_NULLS_LAST + case Ascending => + PrefixComparators.BINARY + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.BINARY_DESC_NULLS_FIRST + case Descending => + PrefixComparators.BINARY_DESC + } + } + + private def longPrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.LONG_NULLS_LAST + case Ascending => + PrefixComparators.LONG + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.LONG_DESC_NULLS_FIRST + case Descending => + PrefixComparators.LONG_DESC + } + } + + private def doublePrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.DOUBLE_NULLS_LAST + case Ascending => + PrefixComparators.DOUBLE + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.DOUBLE_DESC_NULLS_FIRST + case Descending => + PrefixComparators.DOUBLE_DESC } } From aee406ce375f2b12e42b6735565a51e3151c451a Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Fri, 9 Sep 2016 23:15:38 -0700 Subject: [PATCH 16/19] SPARK-10747: minor code style --- .../spark/util/collection/unsafe/sort/RadixSortSuite.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/dsl/package.scala | 4 ++-- .../org/apache/spark/sql/catalyst/expressions/SortOrder.scala | 4 ++-- .../test/scala/org/apache/spark/sql/execution/SortSuite.scala | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index 83786e4769688..366ffda7788d3 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -40,7 +40,7 @@ class RadixSortSuite extends SparkFunSuite with Logging { case class RadixSortType( name: String, referenceComparator: PrefixComparator, - startByteIdx: Int, endByteIdx: Int, descending: Boolean, signed: Boolean, nullFirst: Boolean) + startByteIdx: Int, endByteIdx: Int, descending: Boolean, signed: Boolean, nullsFirst: Boolean) val SORT_TYPES_TO_TEST = Seq( RadixSortType("unsigned binary data asc nulls first", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 23db2e9615fe2..66e52ca68af19 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -109,9 +109,9 @@ package object dsl { def cast(to: DataType): Expression = Cast(expr, to) def asc: SortOrder = SortOrder(expr, Ascending) - def asc_nullLast: SortOrder = SortOrder(expr, Ascending, NullsLast) + def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast) def desc: SortOrder = SortOrder(expr, Descending) - def desc_nullFirst: SortOrder = SortOrder(expr, Descending, NullsFirst) + def desc_nullsFirst: SortOrder = SortOrder(expr, Descending, NullsFirst) def as(alias: String): NamedExpression = Alias(expr, alias)() def as(alias: Symbol): NamedExpression = Alias(expr, alias.name)() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 92ffe0a5792ac..7397daa12aa74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} -import org.apache.spark.sql.types.{BinaryType, StringType, _} -import org.apache.spark.util.collection.unsafe.sort.PrefixComparators.{BinaryPrefixComparator, DoublePrefixComparator, StringPrefixComparator} +import org.apache.spark.sql.types._ +import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._ abstract sealed class SortDirection { def sql: String diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala index 31d70714bcffe..a7bbe34f4eedb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala @@ -102,7 +102,7 @@ class SortSuite extends SparkPlanTest with SharedSQLContext { dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType); nullable <- Seq(true, false); sortOrder <- - Seq('a.asc :: Nil, 'a.asc_nullLast :: Nil, 'a.desc :: Nil, 'a.desc_nullFirst :: Nil); + Seq('a.asc :: Nil, 'a.asc_nullsLast :: Nil, 'a.desc :: Nil, 'a.desc_nullsFirst :: Nil); randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable) ) { test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") { From 4b496f906409724aab6283dc8c3872001787c63b Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Sat, 10 Sep 2016 08:41:53 -0700 Subject: [PATCH 17/19] SPARK-10747: revert extra new line --- .../spark/util/collection/unsafe/sort/PrefixComparators.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java index c5cc9385f9f62..116c84943e855 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java @@ -80,7 +80,6 @@ public static long computePrefix(double value) { * ordering they define is compatible with radix sort. */ public abstract static class RadixSortSupport extends PrefixComparator { - /** @return Whether the sort should be descending in binary sort order. */ public abstract boolean sortDescending(); From fc9396a02c711fda88e0d1b3f20602f131050557 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Mon, 12 Sep 2016 11:18:02 -0700 Subject: [PATCH 18/19] SPARK-10747: update upon reiview --- .../sql/catalyst/expressions/SortOrder.scala | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 7397daa12aa74..a8fdb20a57e7b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -37,7 +37,6 @@ case object Ascending extends SortDirection { override def defaultNullOrdering: NullOrdering = NullsFirst } -// default null order is last for desc case object Descending extends SortDirection { override def sql: String = "DESC" override def defaultNullOrdering: NullOrdering = NullsLast @@ -96,7 +95,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { val nullValue = child.child.dataType match { case BooleanType | DateType | TimestampType | _: IntegralType => if (nullAsSmallest) { - Long.MinValue + Long.MinValue } else { Long.MaxValue } @@ -120,14 +119,10 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { } } - def nullAsSmallest: Boolean = { - if ((child.isAscending && child.nullOrdering == NullsFirst) || - (!child.isAscending && child.nullOrdering == NullsLast)) { - true - } else { - false - } - } + private def nullAsSmallest: Boolean = + ((child.isAscending && child.nullOrdering == NullsFirst) || + (!child.isAscending && child.nullOrdering == NullsLast)) + override def eval(input: InternalRow): Any = throw new UnsupportedOperationException From 5153ce5261752cf6c33b8f48759de495ed8890c3 Mon Sep 17 00:00:00 2001 From: Xin Wu Date: Mon, 12 Sep 2016 11:20:14 -0700 Subject: [PATCH 19/19] SPARK-10747: update upon review --- .../apache/spark/sql/catalyst/expressions/SortOrder.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index a8fdb20a57e7b..d015125baccaf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -119,9 +119,8 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { } } - private def nullAsSmallest: Boolean = - ((child.isAscending && child.nullOrdering == NullsFirst) || - (!child.isAscending && child.nullOrdering == NullsLast)) + private def nullAsSmallest: Boolean = (child.isAscending && child.nullOrdering == NullsFirst) || + (!child.isAscending && child.nullOrdering == NullsLast) override def eval(input: InternalRow): Any = throw new UnsupportedOperationException