fix compilation failure

apache · Mar 2, 2018 · cf2d532 · cf2d532
1 parent c9f401a
commit cf2d532
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 7 deletions.
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -69,7 +69,7 @@ public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) {
     int lengthInBytes = (int)base.size();
     assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
     int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByIntBlock(base, offset, lengthAligned, seed);
+    int h1 = hashBytesByIntBlock(base.subBlock(offset, lengthAligned), seed);
     for (int i = lengthAligned; i < lengthInBytes; i++) {
       int halfWord = base.getByte(offset + i);
       int k1 = mixK1(halfWord);
@@ -106,13 +106,14 @@ public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes,
     return fmix(h1, lengthInBytes);
   }
 
-  public static int hashUnsafeBytes2Block(
-      MemoryBlock base, long offset, int lengthInBytes, int seed) {
+  public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
     // This is compatible with original and another implementations.
     // Use this method for new components after Spark 2.3.
+    long offset = base.getBaseOffset();
+    int lengthInBytes = (int)base.size();
     assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
     int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByIntBlock(base, offset, lengthAligned, seed);
+    int h1 = hashBytesByIntBlock(base.subBlock(offset, lengthAligned), seed);
     int k1 = 0;
     for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
       k1 ^= (base.getByte(offset + i) & 0xFF) << shift;
@@ -121,7 +122,9 @@ public static int hashUnsafeBytes2Block(
     return fmix(h1, lengthInBytes);
   }
 
-  private static int hashBytesByIntBlock(MemoryBlock base, long offset, int lengthInBytes, int seed) {
+  private static int hashBytesByIntBlock(MemoryBlock base, int seed) {
+    long offset = base.getBaseOffset();
+    int lengthInBytes = (int)base.size();
     assert (lengthInBytes % 4 == 0);
     int h1 = seed;
     for (int i = 0; i < lengthInBytes; i += 4) {

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -243,8 +243,7 @@ object FeatureHasher extends DefaultParamsReadable[FeatureHasher] {
       case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed)
       case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed)
       case s: String =>
-        val utf8 = UTF8String.fromString(s)
-        hashUnsafeBytes2Block(utf8.getBaseObject, utf8.getBaseOffset, utf8.numBytes(), seed)
+        hashUnsafeBytes2Block(UTF8String.fromString(s).getMemoryBlock, seed)
       case _ => throw new SparkException("FeatureHasher with murmur3 algorithm does not " +
         s"support type ${term.getClass.getCanonicalName} of input data.")
     }