Skip to content

Commit

Permalink
Keep a deterministic output order in Attribute.toSeq
Browse files Browse the repository at this point in the history
  • Loading branch information
maropu committed Aug 16, 2017
1 parent 282f00b commit 3201f0a
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,12 @@ class AttributeSet private (val baseSet: Set[AttributeEquals])

// We must force toSeq to not be strict otherwise we end up with a [[Stream]] that captures all
// sorts of things in its closure.
override def toSeq: Seq[Attribute] = baseSet.map(_.a).toArray.toSeq
override def toSeq: Seq[Attribute] = {
// We need to keep a deterministic output order for `baseSet` because this affects a variable
// order in generated code (e.g., `GenerateColumnAccessor`).
// See SPARK-18394 for details.
baseSet.map(_.a).toArray.sortBy { a => (a.name, a.exprId.id) }
}

override def toString: String = "{" + baseSet.map(_.a).mkString(", ") + "}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,34 @@ class AttributeSetSuite extends SparkFunSuite {
assert(aSet == aSet)
assert(aSet == AttributeSet(aUpper :: Nil))
}

test("SPARK-18394 keep a deterministic output order along with attribute names") {
val attrSeqA = {
val attr1 = AttributeReference("c1", IntegerType)(exprId = ExprId(1098))
val attr2 = AttributeReference("c2", IntegerType)(exprId = ExprId(107))
val attr3 = AttributeReference("c3", IntegerType)(exprId = ExprId(838))
val attrSetA = AttributeSet(attr1 :: attr2 :: attr3 :: Nil)

val attr4 = AttributeReference("c4", IntegerType)(exprId = ExprId(389))
val attr5 = AttributeReference("c5", IntegerType)(exprId = ExprId(89329))

val attrSetB = AttributeSet(attr4 :: attr5 :: Nil)
(attrSetA ++ attrSetB).toSeq.map(_.name)
}

val attrSeqB = {
val attr1 = AttributeReference("c1", IntegerType)(exprId = ExprId(392))
val attr2 = AttributeReference("c2", IntegerType)(exprId = ExprId(92))
val attr3 = AttributeReference("c3", IntegerType)(exprId = ExprId(87))
val attrSetA = AttributeSet(attr1 :: attr2 :: attr3 :: Nil)

val attr4 = AttributeReference("c4", IntegerType)(exprId = ExprId(9023920))
val attr5 = AttributeReference("c5", IntegerType)(exprId = ExprId(522))
val attrSetB = AttributeSet(attr4 :: attr5 :: Nil)

(attrSetA ++ attrSetB).toSeq.map(_.name)
}

assert(attrSeqA === attrSeqB)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
}.head

assert(actualOutputColumns === expectedOutputColumns, "Output columns mismatch")
assert(actualScannedColumns === expectedScannedColumns, "Scanned columns mismatch")
assert(actualScannedColumns.sorted === expectedScannedColumns.sorted,
"Scanned columns mismatch")

val actualPartitions = actualPartValues.map(_.asScala.mkString(",")).sorted
val expectedPartitions = expectedPartValues.map(_.mkString(",")).sorted
Expand Down

0 comments on commit 3201f0a

Please sign in to comment.