Skip to content
Permalink
Browse files
Add xxHash to Memory for hashCode and arbitrary subsequence.
  • Loading branch information
leerho committed Dec 3, 2018
1 parent 97848c5 commit 6fdd588d4940d440ddff5d0d855cce769e2fa0f1
Showing 6 changed files with 52 additions and 91 deletions.

This file was deleted.

@@ -14,8 +14,6 @@
import java.nio.ByteOrder;
import java.util.concurrent.atomic.AtomicLong;

import com.yahoo.hash.XxHash64;

/**
* Keeps key configuration state for Memory and Buffer plus some common static variables
* and check methods.
@@ -139,22 +137,25 @@ public final long getCapacity() {
}

/**
* Gets the cumulative offset in bytes of this object from the backing resource
* including the Java object header, if any.
* Gets the cumulative offset in bytes of this object from the backing resource.
* This offset may also include other offset components such as the native off-heap
* memory address, DirectByteBuffer split offsets, region offsets, and unsafe arrayBaseOffsets.
*
* @return the cumulative offset in bytes of this object
* @return the cumulative offset in bytes of this object from the backing resource.
*/
public final long getCumulativeOffset() {
assertValid();
return cumBaseOffset_;
}

/**
* Gets the cumulative offset in bytes of this object from the backing resource
* including the Java object header, if any.
* Gets the cumulative offset in bytes of this object from the backing resource including the given
* offsetBytes. This offset may also include other offset components such as the native off-heap
* memory address, DirectByteBuffer split offsets, region offsets, and unsafe arrayBaseOffsets.
*
* @param offsetBytes offset to be added to the base cumulative offset.
* @return the cumulative offset in bytes of this object
* @param offsetBytes offset to be added to the cumulative offset.
* @return the cumulative offset in bytes of this object from the backing resource including the
* given offsetBytes.
*/
public final long getCumulativeOffset(final long offsetBytes) {
assertValid();
@@ -228,27 +229,29 @@ public final boolean hasArray() {
@Override
public final int hashCode() {
checkValid();
final Object unsafeObj = getUnsafeObject();
final long cumOffsetBytes = getCumulativeOffset();
final long lengthBytes = getCapacity();
final long offsetBytes = getCumulativeOffset();
final Object unsafeObj = getUnsafeObject(); //could be null
final long seed = 0;
return (int) XxHash64.hash(unsafeObj, offsetBytes, lengthBytes, seed);
return (int) XxHash64.hash(unsafeObj, cumOffsetBytes, lengthBytes, seed);
}

/**
* Returns the 64-bit hash of the sequence of bytes in this object specified by
* <i>offsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
* <i>offsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>. Note that the sequence of bytes is
* always processed in the same order independent of endianness.
*
* @param offsetBytes the given offset in bytes to the first byte of the byte sequence.
* @param lengthBytes the given length in bytes of the byte sequence.
* @param seed the given long seed.
* @return the 64-bit hash of the sequence of bytes in this object specified by
* <i>offsetBytes</i> and <i>lengthBytes</i>.
*/
public final long hash(final long offsetBytes, final long lengthBytes, final long seed) {
public final long xxHash64(final long offsetBytes, final long lengthBytes, final long seed) {
checkValid();
final Object unsafeObj = getUnsafeObject(); //could be null
return (int) XxHash64.hash(unsafeObj, offsetBytes, lengthBytes, seed);
final Object unsafeObj = getUnsafeObject();
final long cumOffsetBytes = getCumulativeOffset() + offsetBytes;
return XxHash64.hash(unsafeObj, cumOffsetBytes, lengthBytes, seed);
}

/**
@@ -3,7 +3,7 @@
* Apache License 2.0. See LICENSE file at the project root for terms.
*/

package com.yahoo.hash;
package com.yahoo.memory;

import static com.yahoo.memory.UnsafeUtil.unsafe;

@@ -25,7 +25,7 @@
*
* @author Lee Rhodes
*/
public class XxHash64 {
class XxHash64 {
// Unsigned, 64-bit primes
private static final long P1 = -7046029288634856825L;
private static final long P2 = -4417276706812531889L;
@@ -34,33 +34,20 @@ public class XxHash64 {
private static final long P5 = 2870177450012600261L;

/**
* Returns a 64-bit hash.
* @param in a long
* @param seed A long valued seed.
* @return the hash
*/
public static long hash(final long in, final long seed) {
long hash = seed + P5;
hash += 8;
long k1 = in;
k1 *= P2;
k1 = Long.rotateLeft(k1, 31);
k1 *= P1;
hash ^= k1;
hash = (Long.rotateLeft(hash, 27) * P1) + P4;
return finalize(hash);
}

/**
* Returns a 64-bit hash.
* Returns the 64-bit hash of the sequence of bytes in the unsafeObject specified by
* <i>cumOffsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
*
* @param unsafeObj The Object containing the sequence of bytes to hash.
* @param offsetBytes the offset in bytes
* @param lengthBytes the length in bytes
* @param unsafeObject A reference to the object parameter required by unsafe.
* @param cumOffsetBytes cumulative offset in bytes of this object from the backing resource
* including any user given offsetBytes. This offset may also include other offset components
* such as the native off-heap memory address, DirectByteBuffer split offsets, region offsets,
* and unsafe arrayBaseOffsets.
* @param lengthBytes the length in bytes of the sequence to be hashed
* @param seed a given seed
* @return a 64-bit hash
* @return the 64-bit hash of the sequence of bytes in the unsafeObject specified by
* <i>cumOffsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
*/
public static long hash(final Object unsafeObj, long offsetBytes, final long lengthBytes, final long seed) {
static long hash(final Object unsafeObj, long cumOffsetBytes, final long lengthBytes, final long seed) {
long hash;
long remaining = lengthBytes;

@@ -71,23 +58,23 @@ public static long hash(final Object unsafeObj, long offsetBytes, final long len
long v4 = seed - P1;

do {
v1 += unsafe.getLong(unsafeObj, offsetBytes) * P2;
v1 += unsafe.getLong(unsafeObj, cumOffsetBytes) * P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;

v2 += unsafe.getLong(unsafeObj, offsetBytes + 8L) * P2;
v2 += unsafe.getLong(unsafeObj, cumOffsetBytes + 8L) * P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;

v3 += unsafe.getLong(unsafeObj, offsetBytes + 16L) * P2;
v3 += unsafe.getLong(unsafeObj, cumOffsetBytes + 16L) * P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;

v4 += unsafe.getLong(unsafeObj, offsetBytes + 24L) * P2;
v4 += unsafe.getLong(unsafeObj, cumOffsetBytes + 24L) * P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;

offsetBytes += 32;
cumOffsetBytes += 32;
remaining -= 32;
} while (remaining >= 32);

@@ -127,28 +114,28 @@ public static long hash(final Object unsafeObj, long offsetBytes, final long len
hash += lengthBytes;

while (remaining >= 8) {
long k1 = unsafe.getLong(unsafeObj, offsetBytes);
long k1 = unsafe.getLong(unsafeObj, cumOffsetBytes);
k1 *= P2;
k1 = Long.rotateLeft(k1, 31);
k1 *= P1;
hash ^= k1;
hash = (Long.rotateLeft(hash, 27) * P1) + P4;
offsetBytes += 8;
cumOffsetBytes += 8;
remaining -= 8;
}

if (remaining >= 4) { //treat as unsigned ints
hash ^= (unsafe.getInt(unsafeObj, offsetBytes) & 0XFFFF_FFFFL) * P1;
hash ^= (unsafe.getInt(unsafeObj, cumOffsetBytes) & 0XFFFF_FFFFL) * P1;
hash = (Long.rotateLeft(hash, 23) * P2) + P3;
offsetBytes += 4;
cumOffsetBytes += 4;
remaining -= 4;
}

while (remaining != 0) { //treat as unsigned bytes
hash ^= (unsafe.getByte(unsafeObj, offsetBytes) & 0XFFL) * P5;
hash ^= (unsafe.getByte(unsafeObj, cumOffsetBytes) & 0XFFL) * P5;
hash = Long.rotateLeft(hash, 11) * P1;
--remaining;
++offsetBytes;
++cumOffsetBytes;
}

return finalize(hash);
@@ -162,6 +149,5 @@ private static long finalize(long hash) {
hash ^= hash >>> 32;
return hash;
}

}

This file was deleted.

@@ -3,7 +3,7 @@
* Apache License 2.0. See LICENSE file at the project root for terms.
*/

package com.yahoo.hash;
package com.yahoo.memory;

import static org.testng.Assert.assertEquals;

@@ -19,12 +19,11 @@ public class XxHash64LoopingTest {
@Test
public void testWithSeed() {
long seed = 42L;
for (int i = 0; i < 2; i++) { //1025
for (int i = 0; i < 1025; i++) {
byte[] byteArr = new byte[i];
for (int j = 0; j < byteArr.length; j++) { byteArr[j] = (byte) j; }
WritableMemory wmem = WritableMemory.wrap(byteArr);
final long cumOffset = wmem.getCumulativeOffset();
long hash = XxHash64.hash(wmem.getArray(), cumOffset, byteArr.length, seed);
long hash = wmem.xxHash64(0, byteArr.length, seed);
assertEquals(hash, HASHES_OF_LOOPING_BYTES_WITH_SEED_42[i]);
}
}
@@ -3,7 +3,7 @@
* Apache License 2.0. See LICENSE file at the project root for terms.
*/

package com.yahoo.hash;
package com.yahoo.memory;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
@@ -31,7 +31,7 @@ public void offsetChecks() {

for (int offset = 0; offset < 16; offset++) {
int arrLen = cap - offset;
hash = XxHash64.hash(wmem.getArray(), offset, arrLen, seed);
hash = wmem.xxHash64(offset, arrLen, seed);
assertTrue(hash != 0);
}
}
@@ -48,35 +48,28 @@ public void byteArrChecks() {
WritableMemory wmem = WritableMemory.wrap(in);
for (int i = 0; i < j; i++) { wmem.putByte(i, (byte) (-128 + i)); }

long hash = XxHash64.hash(wmem.getArray(), offset, bytes, seed);
long hash =wmem.xxHash64(offset, bytes, seed);
assertTrue(hash != 0);
}
}

@Test
public void longCheck() {
long seed = 0;
long hash = XxHash64.hash(123L, seed);
assertTrue(hash != 0);
}

@Test
public void collisionTest() {
WritableMemory wmem = WritableMemory.allocate(128);
wmem.putLong(0, 1);
wmem.putLong(16, 42);
wmem.putLong(32, 2);
long h1 = XxHash64.hash(wmem.getArray(), 0, wmem.getCapacity(), 0);
long h1 = wmem.xxHash64(0, wmem.getCapacity(), 0);

wmem.putLong(0, 1 + 0xBA79078168D4BAFL);
wmem.putLong(32, 2 + 0x9C90005B80000000L);
long h2 = XxHash64.hash(wmem.getArray(), 0, wmem.getCapacity(), 0);
long h2 = wmem.xxHash64(0, wmem.getCapacity(), 0);
assertEquals(h1, h2);

wmem.putLong(0, 1 + (0xBA79078168D4BAFL * 2));
wmem.putLong(32, 2 + (0x9C90005B80000000L * 2));

long h3 = XxHash64.hash(wmem.getArray(), 0, wmem.getCapacity(), 0);
long h3 = wmem.xxHash64(0, wmem.getCapacity(), 0);
assertEquals(h2, h3);
}

0 comments on commit 6fdd588

Please sign in to comment.