Skip to content
Permalink
Browse files
Merge pull request #93 from DataSketches/xxHashIntegration
xxHash integration
  • Loading branch information
leerho committed Dec 4, 2018
2 parents 20303df + 1bcaafc commit 31535c4e91eee8cf4e29c6bb6e7fa28903342b62
Showing 7 changed files with 1,363 additions and 141 deletions.
@@ -206,6 +206,13 @@
<version>${findbugs.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<!-- Used for xxHash testing -->
<groupId>net.openhft</groupId>
<artifactId>zero-allocation-hashing</artifactId>
<version>0.8</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
@@ -137,22 +137,25 @@ public final long getCapacity() {
}

/**
* Gets the cumulative offset in bytes of this object from the backing resource
* including the Java object header, if any.
* Gets the cumulative offset in bytes of this object from the backing resource.
* This offset may also include other offset components such as the native off-heap
* memory address, DirectByteBuffer split offsets, region offsets, and unsafe arrayBaseOffsets.
*
* @return the cumulative offset in bytes of this object
* @return the cumulative offset in bytes of this object from the backing resource.
*/
public final long getCumulativeOffset() {
assertValid();
return cumBaseOffset_;
}

/**
* Gets the cumulative offset in bytes of this object from the backing resource
* including the Java object header, if any.
* Gets the cumulative offset in bytes of this object from the backing resource including the given
* offsetBytes. This offset may also include other offset components such as the native off-heap
* memory address, DirectByteBuffer split offsets, region offsets, and unsafe arrayBaseOffsets.
*
* @param offsetBytes offset to be added to the base cumulative offset.
* @return the cumulative offset in bytes of this object
* @param offsetBytes offset to be added to the cumulative offset.
* @return the cumulative offset in bytes of this object from the backing resource including the
* given offsetBytes.
*/
public final long getCumulativeOffset(final long offsetBytes) {
assertValid();
@@ -212,20 +215,36 @@ public final boolean hasArray() {
}

/**
* Returns the hashCode of this class.
* Returns the hashCode of this object.
*
* <p>The hash code of this class depends upon all of its contents.
* <p>The hash code of this object depends upon all of its contents.
* Because of this, it is inadvisable to use these objects as keys in hash maps
* or similar data structures unless it is known that their contents will not change.</p>
*
* <p>If it is desirable to use these objects in a hash map depending only on object identity,
* than the {@link java.util.IdentityHashMap} can be used.</p>
*
* @return the hashCode of this class.
* @return the hashCode of this object.
*/
@Override
public final int hashCode() {
return CompareAndCopy.hashCode(this);
return (int) xxHash64(0, getCapacity(), 0);
}

/**
* Returns the 64-bit hash of the sequence of bytes in this object specified by
* <i>offsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>. Note that the sequence of bytes is
* always processed in the same order independent of endianness.
*
* @param offsetBytes the given offset in bytes to the first byte of the byte sequence.
* @param lengthBytes the given length in bytes of the byte sequence.
* @param seed the given long seed.
* @return the 64-bit hash of the sequence of bytes in this object specified by
* <i>offsetBytes</i> and <i>lengthBytes</i>.
*/
public final long xxHash64(final long offsetBytes, final long lengthBytes, final long seed) {
checkValid();
return XxHash64.hash(getUnsafeObject(), getCumulativeOffset() + offsetBytes, lengthBytes, seed);
}

/**
@@ -108,135 +108,6 @@ private static boolean equalsByBytes(final Object arr1, final long cumOff1, fina
return true;
}

// Unsigned, 64-bit primes
private static final long P1 = -7046029288634856825L;
private static final long P2 = -4417276706812531889L;
private static final long P3 = 1609587929392839161L;
private static final long P4 = -8796714831421723037L;
private static final long P5 = 2870177450012600261L;


/**
* The hashCode is computed using the XxHash algorithm, a fast, non-cryptographic, 64-bit hash
* function that has excellent avalanche and 2-way bit independence properties.
* This java version used the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
* referenced below as inspiration.
*
* <p>The C++ source repository:
* <a href="https://github.com/Cyan4973/xxHash">
* https://github.com/Cyan4973/xxHash</a>. It has a BSD 2-Clause License:
* <a href="http://www.opensource.org/licenses/bsd-license.php">
* http://www.opensource.org/licenses/bsd-license.php</a>
*
* <p>Portions of this code were leveraged from
* <a href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/main/java/net/openhft/hashing/XxHash.java">
* OpenHFT/Zero-Allocation-Hashing</a>, which has an Apache 2 license as does this site.
*/
static int hashCode(final BaseState state) {
state.checkValid();
final long lengthBytes = state.getCapacity();
long offsetBytes = state.getCumulativeOffset();
final Object arr = state.getUnsafeObject(); //could be null

long hash;
long remaining = lengthBytes;

if (remaining >= 32) {
long v1 = P1 + P2;
long v2 = P2;
long v3 = 0;
long v4 = P1;

do {
v1 += unsafe.getLong(arr, offsetBytes) * P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;

v2 += unsafe.getLong(arr, offsetBytes + 8L) * P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;

v3 += unsafe.getLong(arr, offsetBytes + 16L) * P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;

v4 += unsafe.getLong(arr, offsetBytes + 24L) * P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;

offsetBytes += 32;
remaining -= 32;
} while (remaining >= 32);

hash = Long.rotateLeft(v1, 1)
+ Long.rotateLeft(v2, 7)
+ Long.rotateLeft(v3, 12)
+ Long.rotateLeft(v4, 18);

v1 *= P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;
hash ^= v1;
hash = (hash * P1) + P4;

v2 *= P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;
hash ^= v2;
hash = (hash * P1) + P4;

v3 *= P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;
hash ^= v3;
hash = (hash * P1) + P4;

v4 *= P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;
hash ^= v4;
hash = (hash * P1) + P4;
} //end remaining >= 32
else {
hash = P5;
}

hash += lengthBytes;

while (remaining >= 8) {
long k1 = unsafe.getLong(arr, offsetBytes);
k1 *= P2;
k1 = Long.rotateLeft(k1, 31);
k1 *= P1;
hash ^= k1;
hash = (Long.rotateLeft(hash, 27) * P1) + P4;
offsetBytes += 8;
remaining -= 8;
}

if (remaining >= 4) { //treat as unsigned ints
hash ^= (unsafe.getInt(arr, offsetBytes) & 0XFFFF_FFFFL) * P1;
hash = (Long.rotateLeft(hash, 23) * P2) + P3;
offsetBytes += 4;
remaining -= 4;
}

while (remaining != 0) { //treat as unsigned bytes
hash ^= (unsafe.getByte(arr, offsetBytes) & 0XFFL) * P5;
hash = Long.rotateLeft(hash, 11) * P1;
--remaining;
++offsetBytes;
}

//Finalize
hash ^= hash >>> 33;
hash *= P2;
hash ^= hash >>> 29;
hash *= P3;
hash ^= hash >>> 32;
return (int) hash;
}

static void copy(final BaseState srcState, final long srcOffsetBytes,
final BaseState dstState, final long dstOffsetBytes, final long lengthBytes) {
srcState.checkValid();
@@ -0,0 +1,154 @@
/*
* Copyright 2018, Yahoo! Inc. Licensed under the terms of the
* Apache License 2.0. See LICENSE file at the project root for terms.
*/

package com.yahoo.memory;

import static com.yahoo.memory.UnsafeUtil.unsafe;

/**
* The XxHash is a fast, non-cryptographic, 64-bit hash function that has
* excellent avalanche and 2-way bit independence properties.
* This java version used the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
* referenced below as inspiration.
*
* <p>The C++ source repository:
* <a href="https://github.com/Cyan4973/xxHash">
* https://github.com/Cyan4973/xxHash</a>. It has a BSD 2-Clause License:
* <a href="http://www.opensource.org/licenses/bsd-license.php">
* http://www.opensource.org/licenses/bsd-license.php</a>
*
* <p>Portions of this code were leveraged from
* <a href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/main/java/net/openhft/hashing/XxHash.java">
* OpenHFT/Zero-Allocation-Hashing</a>, which has an Apache 2 license as does this site.
*
* @author Lee Rhodes
*/
class XxHash64 {
// Unsigned, 64-bit primes
private static final long P1 = -7046029288634856825L;
private static final long P2 = -4417276706812531889L;
private static final long P3 = 1609587929392839161L;
private static final long P4 = -8796714831421723037L;
private static final long P5 = 2870177450012600261L;

/**
* Returns the 64-bit hash of the sequence of bytes in the unsafeObject specified by
* <i>cumOffsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
*
* @param unsafeObj A reference to the object parameter required by unsafe. It may be null.
* @param cumOffsetBytes cumulative offset in bytes of this object from the backing resource
* including any user given offsetBytes. This offset may also include other offset components
* such as the native off-heap memory address, DirectByteBuffer split offsets, region offsets,
* and unsafe arrayBaseOffsets.
* @param lengthBytes the length in bytes of the sequence to be hashed
* @param seed a given seed
* @return the 64-bit hash of the sequence of bytes in the unsafeObject specified by
* <i>cumOffsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
*/
static long hash(final Object unsafeObj, long cumOffsetBytes, final long lengthBytes,
final long seed) {
long hash;
long remaining = lengthBytes;

if (remaining >= 32) {
long v1 = seed + P1 + P2;
long v2 = seed + P2;
long v3 = seed;
long v4 = seed - P1;

do {
v1 += unsafe.getLong(unsafeObj, cumOffsetBytes) * P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;

v2 += unsafe.getLong(unsafeObj, cumOffsetBytes + 8L) * P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;

v3 += unsafe.getLong(unsafeObj, cumOffsetBytes + 16L) * P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;

v4 += unsafe.getLong(unsafeObj, cumOffsetBytes + 24L) * P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;

cumOffsetBytes += 32;
remaining -= 32;
} while (remaining >= 32);

hash = Long.rotateLeft(v1, 1)
+ Long.rotateLeft(v2, 7)
+ Long.rotateLeft(v3, 12)
+ Long.rotateLeft(v4, 18);

v1 *= P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;
hash ^= v1;
hash = (hash * P1) + P4;

v2 *= P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;
hash ^= v2;
hash = (hash * P1) + P4;

v3 *= P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;
hash ^= v3;
hash = (hash * P1) + P4;

v4 *= P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;
hash ^= v4;
hash = (hash * P1) + P4;
} //end remaining >= 32
else {
hash = seed + P5;
}

hash += lengthBytes;

while (remaining >= 8) {
long k1 = unsafe.getLong(unsafeObj, cumOffsetBytes);
k1 *= P2;
k1 = Long.rotateLeft(k1, 31);
k1 *= P1;
hash ^= k1;
hash = (Long.rotateLeft(hash, 27) * P1) + P4;
cumOffsetBytes += 8;
remaining -= 8;
}

if (remaining >= 4) { //treat as unsigned ints
hash ^= (unsafe.getInt(unsafeObj, cumOffsetBytes) & 0XFFFF_FFFFL) * P1;
hash = (Long.rotateLeft(hash, 23) * P2) + P3;
cumOffsetBytes += 4;
remaining -= 4;
}

while (remaining != 0) { //treat as unsigned bytes
hash ^= (unsafe.getByte(unsafeObj, cumOffsetBytes) & 0XFFL) * P5;
hash = Long.rotateLeft(hash, 11) * P1;
--remaining;
++cumOffsetBytes;
}

return finalize(hash);
}

private static long finalize(long hash) {
hash ^= hash >>> 33;
hash *= P2;
hash ^= hash >>> 29;
hash *= P3;
hash ^= hash >>> 32;
return hash;
}
}

@@ -412,7 +412,7 @@ public void checkNullMemReqSvr() {
public void checkHashCode() {
WritableMemory wmem = WritableMemory.allocate(32 + 7);
int hc = wmem.hashCode();
assertEquals(hc, -960627348);
assertEquals(hc, -1895166923);
}

@Test

0 comments on commit 31535c4

Please sign in to comment.