Skip to content

Commit

Permalink
[SPARK-30633][SQL] Append L to seed when type is LongType
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Allow for using longs as seed for xxHash.

### Why are the changes needed?

Codegen fails when passing a seed to xxHash that is > 2^31.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Existing tests pass. Should more be added?

Closes #27354 from patrickcording/fix_xxhash_seed_bug.

Authored-by: Patrick Cording <patrick.cording@datarobot.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
patrickcording authored and dongjoon-hyun committed Jan 27, 2020
1 parent 0436b3d commit c5c580b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
Expand Up @@ -282,6 +282,7 @@ abstract class HashExpression[E] extends Expression {
}

val hashResultType = CodeGenerator.javaType(dataType)
val typedSeed = if (dataType.sameType(LongType)) s"${seed}L" else s"$seed"
val codes = ctx.splitExpressionsWithCurrentInputs(
expressions = childrenHash,
funcName = "computeHash",
Expand All @@ -296,7 +297,7 @@ abstract class HashExpression[E] extends Expression {

ev.copy(code =
code"""
|$hashResultType ${ev.value} = $seed;
|$hashResultType ${ev.value} = $typedSeed;
|$codes
""".stripMargin)
}
Expand Down
Expand Up @@ -684,6 +684,33 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
assert(murmur3HashPlan(wideRow).getInt(0) == murmursHashEval)
}

test("SPARK-30633: xxHash with different type seeds") {
val literal = Literal.create(42L, LongType)

val longSeeds = Seq(
Long.MinValue,
Integer.MIN_VALUE.toLong - 1L,
0L,
Integer.MAX_VALUE.toLong + 1L,
Long.MaxValue
)
for (seed <- longSeeds) {
checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval())
}

val intSeeds = Seq(
Integer.MIN_VALUE,
0,
Integer.MAX_VALUE
)
for (seed <- intSeeds) {
checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval())
}

checkEvaluation(XxHash64(Seq(literal), 100), XxHash64(Seq(literal), 100L).eval())
checkEvaluation(XxHash64(Seq(literal), 100L), XxHash64(Seq(literal), 100).eval())
}

private def testHash(inputSchema: StructType): Unit = {
val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
val encoder = RowEncoder(inputSchema)
Expand All @@ -700,5 +727,17 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(HiveHash(literals), HiveHash(literals).eval())
}
}

val longSeed = Math.abs(seed).toLong + Integer.MAX_VALUE.toLong
test(s"SPARK-30633: xxHash64 with long seed: ${inputSchema.simpleString}") {
for (_ <- 1 to 10) {
val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow]
val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map {
case (value, dt) => Literal.create(value, dt)
}
// Only test the interpreted version has same result with codegen version.
checkEvaluation(XxHash64(literals, longSeed), XxHash64(literals, longSeed).eval())
}
}
}
}

0 comments on commit c5c580b

Please sign in to comment.