Skip to content
Permalink
Browse files
Merge pull request #88 from DataSketches/NewHashCode
Update hashCode computation
  • Loading branch information
leerho committed Dec 1, 2018
2 parents 9f99509 + 020354d commit 20303df7b9ffb5cced7b518b6b6f0f063d5a1b69
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 25 deletions.
@@ -108,34 +108,133 @@ private static boolean equalsByBytes(final Object arr1, final long cumOff1, fina
return true;
}

// Unsigned, 64-bit primes
private static final long P1 = -7046029288634856825L;
private static final long P2 = -4417276706812531889L;
private static final long P3 = 1609587929392839161L;
private static final long P4 = -8796714831421723037L;
private static final long P5 = 2870177450012600261L;


/**
* The hashCode is computed using the XxHash algorithm, a fast, non-cryptographic, 64-bit hash
* function that has excellent avalanche and 2-way bit independence properties.
* This java version used the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
* referenced below as inspiration.
*
* <p>The C++ source repository:
* <a href="https://github.com/Cyan4973/xxHash">
* https://github.com/Cyan4973/xxHash</a>. It has a BSD 2-Clause License:
* <a href="http://www.opensource.org/licenses/bsd-license.php">
* http://www.opensource.org/licenses/bsd-license.php</a>
*
* <p>Portions of this code were leveraged from
* <a href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/main/java/net/openhft/hashing/XxHash.java">
* OpenHFT/Zero-Allocation-Hashing</a>, which has an Apache 2 license as does this site.
*/
static int hashCode(final BaseState state) {
state.checkValid();
long lenBytes = state.getCapacity();
long cumOff = state.getCumulativeOffset();
final long lengthBytes = state.getCapacity();
long offsetBytes = state.getCumulativeOffset();
final Object arr = state.getUnsafeObject(); //could be null
int result = 1;
while (lenBytes >= Long.BYTES) {
final int chunk = (int) Math.min(lenBytes, UNSAFE_COPY_THRESHOLD_BYTES);
// int-counted loop to avoid safepoint polls (otherwise why we chunk by
// UNSAFE_COPY_MEMORY_THRESHOLD)
int i = 0;

for (; i <= (chunk - Long.BYTES); i += Long.BYTES) {
final long v = unsafe.getLong(arr, cumOff + i);
final int vHash = (int) (v ^ (v >>> 32));
result = (31 * result) + vHash;
}
lenBytes -= i;
cumOff += i;
}
//hash the remainder bytes, if any, as a long
if (lenBytes == 0) { return result; }
long v = 0;
for (int i = 0; i < lenBytes; i++) {
v |= (unsafe.getByte(arr, cumOff + i) & 0XFFL) << (i << 3);
}
final int vHash = (int) (v ^ (v >>> 32));
return (31 * result) + vHash;
long hash;
long remaining = lengthBytes;

if (remaining >= 32) {
long v1 = P1 + P2;
long v2 = P2;
long v3 = 0;
long v4 = P1;

do {
v1 += unsafe.getLong(arr, offsetBytes) * P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;

v2 += unsafe.getLong(arr, offsetBytes + 8L) * P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;

v3 += unsafe.getLong(arr, offsetBytes + 16L) * P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;

v4 += unsafe.getLong(arr, offsetBytes + 24L) * P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;

offsetBytes += 32;
remaining -= 32;
} while (remaining >= 32);

hash = Long.rotateLeft(v1, 1)
+ Long.rotateLeft(v2, 7)
+ Long.rotateLeft(v3, 12)
+ Long.rotateLeft(v4, 18);

v1 *= P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;
hash ^= v1;
hash = (hash * P1) + P4;

v2 *= P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;
hash ^= v2;
hash = (hash * P1) + P4;

v3 *= P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;
hash ^= v3;
hash = (hash * P1) + P4;

v4 *= P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;
hash ^= v4;
hash = (hash * P1) + P4;
} //end remaining >= 32
else {
hash = P5;
}

hash += lengthBytes;

while (remaining >= 8) {
long k1 = unsafe.getLong(arr, offsetBytes);
k1 *= P2;
k1 = Long.rotateLeft(k1, 31);
k1 *= P1;
hash ^= k1;
hash = (Long.rotateLeft(hash, 27) * P1) + P4;
offsetBytes += 8;
remaining -= 8;
}

if (remaining >= 4) { //treat as unsigned ints
hash ^= (unsafe.getInt(arr, offsetBytes) & 0XFFFF_FFFFL) * P1;
hash = (Long.rotateLeft(hash, 23) * P2) + P3;
offsetBytes += 4;
remaining -= 4;
}

while (remaining != 0) { //treat as unsigned bytes
hash ^= (unsafe.getByte(arr, offsetBytes) & 0XFFL) * P5;
hash = Long.rotateLeft(hash, 11) * P1;
--remaining;
++offsetBytes;
}

//Finalize
hash ^= hash >>> 33;
hash *= P2;
hash ^= hash >>> 29;
hash *= P3;
hash ^= hash >>> 32;
return (int) hash;
}

static void copy(final BaseState srcState, final long srcOffsetBytes,
@@ -412,7 +412,7 @@ public void checkNullMemReqSvr() {
public void checkHashCode() {
WritableMemory wmem = WritableMemory.allocate(32 + 7);
int hc = wmem.hashCode();
assertEquals(hc, 28629151);
assertEquals(hc, -960627348);
}

@Test

0 comments on commit 20303df

Please sign in to comment.