Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 21 additions & 31 deletions hazelcast/hash.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,28 @@
import math
from hazelcast.serialization import LE_UINT
from hazelcast.six.moves import range


def _fmix(h):
h ^= h >> 16
h = (h * 0x85ebca6b) & 0xFFFFFFFF
h ^= h >> 13
h = (h * 0xc2b2ae35) & 0xFFFFFFFF
h ^= h >> 16
return h


def murmur_hash3_x86_32(data, offset, size, seed=0x01000193):
def murmur_hash3_x86_32(data):
"""murmur3 hash function to determine partition

Args:
data (bytearray): Input byte array
offset (int): Offset.
size (int): Byte length.
seed (int): Murmur hash seed hazelcast uses 0x01000193.
data (bytearray or bytes): Input byte array

Returns:
int: Calculated hash value.
"""
key = bytearray(data[offset: offset + size])
length = len(key)
nblocks = int(length / 4)
length = max(len(data) - 8, 0) # Heap data overhead
nblocks = length // 4

h1 = seed
h1 = 0x01000193

c1 = 0xcc9e2d51
c2 = 0x1b873593

# body
for block_start in range(0, nblocks * 4, 4):
# ??? big endian?
k1 = key[block_start + 3] << 24 | \
key[block_start + 2] << 16 | \
key[block_start + 1] << 8 | \
key[block_start + 0]
k1 = LE_UINT.unpack_from(data, block_start + 8)[0]

k1 = c1 * k1 & 0xFFFFFFFF
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
Expand All @@ -53,25 +37,31 @@ def murmur_hash3_x86_32(data, offset, size, seed=0x01000193):
k1 = 0
tail_size = length & 3

# Offsets below are shifted according to heap data overhead
if tail_size >= 3:
k1 ^= key[tail_index + 2] << 16
k1 ^= data[tail_index + 10] << 16
if tail_size >= 2:
k1 ^= key[tail_index + 1] << 8
k1 ^= data[tail_index + 9] << 8
if tail_size >= 1:
k1 ^= key[tail_index + 0]
k1 ^= data[tail_index + 8]

if tail_size != 0:
k1 = (k1 * c1) & 0xFFFFFFFF
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # _ROTL32
k1 = (k1 * c2) & 0xFFFFFFFF
h1 ^= k1

result = _fmix(h1 ^ length)
return -(result & 0x80000000) | (result & 0x7FFFFFFF)
h1 ^= length
h1 ^= h1 >> 16
h1 = (h1 * 0x85ebca6b) & 0xFFFFFFFF
h1 ^= h1 >> 13
h1 = (h1 * 0xc2b2ae35) & 0xFFFFFFFF
h1 ^= h1 >> 16
return -(h1 & 0x80000000) | (h1 & 0x7FFFFFFF)


def hash_to_index(hash, length):
if hash == 0x80000000:
def hash_to_index(mm_hash, length):
if mm_hash == 0x80000000:
return 0
else:
return int(abs(math.fmod(hash, length)))
return abs(mm_hash) % length
14 changes: 12 additions & 2 deletions hazelcast/serialization/data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from hazelcast import six
from hazelcast.hash import murmur_hash3_x86_32
from hazelcast.serialization import BE_INT
from hazelcast.serialization.serialization_const import *
Expand Down Expand Up @@ -92,10 +93,19 @@ def hash_code(self):
Returns:
int: The murmur hash of the internal data.
"""
return murmur_hash3_x86_32(self._buffer, DATA_OFFSET, self.data_size())
return murmur_hash3_x86_32(self._buffer)

def __hash__(self):
return self.hash_code()
# Data objects are used in NearCache as keys.
# When this method is called on Data objects
# received from the members, buffer is type
# of bytes instead of bytearray. Since bytes
# is an alias of str in Python2, we cannot
# use murmur hash directly on it. The
# conversion is necessary only on Python2
if isinstance(self._buffer, bytearray) or six.PY3:
return murmur_hash3_x86_32(self._buffer)
return murmur_hash3_x86_32(bytearray(self._buffer))

def __eq__(self, other):
return isinstance(other, Data) and self.total_size() == other.total_size() \
Expand Down
35 changes: 22 additions & 13 deletions tests/murmur_hash_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,29 @@

class HashTest(unittest.TestCase):
def test_hash(self):
expected = [
(b"key-1", 1228513025, 107),
(b"key-2", 1503416236, 105),
(b"key-3", 1876349747, 218),
(b"key-4", -914632498, 181),
(b"key-5", -803210507, 111),
(b"key-6", -847942313, 115),
(b"key-7", 1196747334, 223),
(b"key-8", -1444149994, 208),
(b"key-9", 1182720020, 140),
expected = [ # Expected values are from the Java implementation
# 00000000 -> HEAP_DATA_OVERHEAD
(b"00000000key-1", 1228513025, 107),
(b"12345678key-1", 1228513025, 107), # Heap data overhead should not matter
(b"00000000key-2", 1503416236, 105),
(b"00000000key-3", 1876349747, 218),
(b"00000000key-4", -914632498, 181),
(b"00000000key-5", -803210507, 111),
(b"00000000key-6", -847942313, 115),
(b"00000000key-7", 1196747334, 223),
(b"00000000key-8", -1444149994, 208),
(b"00000000key-9", 1182720020, 140),
# Test with different lengths
(b"00000000", -1585187909, 238),
(b"00000000a", -1686100800, 46),
(b"00000000ab", 312914265, 50),
(b"00000000abc", -2068121803, 208),
(b"00000000abcd", -973615161, 236),
(b"", -1585187909, 238),
]

for key, hash, partition_id in expected:
h = murmur_hash3_x86_32(key, 0, len(key))
for key, mm_hash, partition_id in expected:
h = murmur_hash3_x86_32(bytearray(key))
p = hash_to_index(h, 271)
self.assertEqual(h, hash)
self.assertEqual(h, mm_hash)
self.assertEqual(p, partition_id)