From cf2d532ae9c8688ef314a51a89c76abe2fd5d857 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 2 Mar 2018 06:03:41 +0000 Subject: [PATCH] fix compilation failure --- .../apache/spark/unsafe/hash/Murmur3_x86_32.java | 13 ++++++++----- .../org/apache/spark/ml/feature/FeatureHasher.scala | 3 +-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java index c8b6320287e73..ec6c4942cf63f 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java @@ -69,7 +69,7 @@ public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) { int lengthInBytes = (int)base.size(); assert (lengthInBytes >= 0): "lengthInBytes cannot be negative"; int lengthAligned = lengthInBytes - lengthInBytes % 4; - int h1 = hashBytesByIntBlock(base, offset, lengthAligned, seed); + int h1 = hashBytesByIntBlock(base.subBlock(offset, lengthAligned), seed); for (int i = lengthAligned; i < lengthInBytes; i++) { int halfWord = base.getByte(offset + i); int k1 = mixK1(halfWord); @@ -106,13 +106,14 @@ public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, return fmix(h1, lengthInBytes); } - public static int hashUnsafeBytes2Block( - MemoryBlock base, long offset, int lengthInBytes, int seed) { + public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) { // This is compatible with original and another implementations. // Use this method for new components after Spark 2.3. + long offset = base.getBaseOffset(); + int lengthInBytes = (int)base.size(); assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative"; int lengthAligned = lengthInBytes - lengthInBytes % 4; - int h1 = hashBytesByIntBlock(base, offset, lengthAligned, seed); + int h1 = hashBytesByIntBlock(base.subBlock(offset, lengthAligned), seed); int k1 = 0; for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) { k1 ^= (base.getByte(offset + i) & 0xFF) << shift; @@ -121,7 +122,9 @@ public static int hashUnsafeBytes2Block( return fmix(h1, lengthInBytes); } - private static int hashBytesByIntBlock(MemoryBlock base, long offset, int lengthInBytes, int seed) { + private static int hashBytesByIntBlock(MemoryBlock base, int seed) { + long offset = base.getBaseOffset(); + int lengthInBytes = (int)base.size(); assert (lengthInBytes % 4 == 0); int h1 = seed; for (int i = 0; i < lengthInBytes; i += 4) { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala index d5694cd416fe0..d67e4819b161a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala @@ -243,8 +243,7 @@ object FeatureHasher extends DefaultParamsReadable[FeatureHasher] { case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed) case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed) case s: String => - val utf8 = UTF8String.fromString(s) - hashUnsafeBytes2Block(utf8.getBaseObject, utf8.getBaseOffset, utf8.numBytes(), seed) + hashUnsafeBytes2Block(UTF8String.fromString(s).getMemoryBlock, seed) case _ => throw new SparkException("FeatureHasher with murmur3 algorithm does not " + s"support type ${term.getClass.getCanonicalName} of input data.") }