Skip to content

Commit 64ee4a9

Browse files
author
Michael Penick
committed
Fix: Random partitioner hash values should be in the range [1, 2^127-1]
1 parent 188d004 commit 64ee4a9

File tree

5 files changed

+420
-17
lines changed

5 files changed

+420
-17
lines changed

src/md5.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,6 @@ void Md5::final(uint8_t* result) {
124124
memset(this, 0, sizeof(Md5));
125125
}
126126

127-
void Md5::final(uint64_t* hi, uint64_t* lo) {
128-
final();
129-
130-
*hi = static_cast<uint64_t>(a_) << 32 | (static_cast<uint64_t>(b_) & 0xFFFFFFFF);
131-
*lo = static_cast<uint64_t>(c_) << 32 | (static_cast<uint64_t>(d_) & 0xFFFFFFFF);
132-
133-
memset(this, 0, sizeof(Md5));
134-
}
135-
136127
void Md5::final() {
137128
unsigned long used, free;
138129

src/token_map_impl.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,45 @@ RandomPartitioner::Token RandomPartitioner::from_string(const StringRef& str) {
100100
return token;
101101
}
102102

103+
uint64_t RandomPartitioner::encode(uint8_t* bytes) {
104+
uint64_t result = 0;
105+
const size_t num_bytes = sizeof(uint64_t);
106+
for (size_t i = 0; i < num_bytes; ++i) {
107+
result |= (static_cast<uint64_t>(bytes[i]) << (8 * (num_bytes - i - 1)));
108+
}
109+
return result;
110+
}
111+
112+
RandomPartitioner::Token RandomPartitioner::abs(RandomPartitioner::Token token) {
113+
if (token.hi & 0x8000000000000000ULL) {
114+
token.hi = ~token.hi;
115+
token.lo = ~token.lo;
116+
117+
uint64_t old_lo = token.lo;
118+
++token.lo;
119+
// Carry to "hi" if our "lo" value wrapped
120+
if(token.lo < old_lo) {
121+
++token.hi;
122+
}
123+
}
124+
return token;
125+
}
126+
103127
RandomPartitioner::Token RandomPartitioner::hash(const StringRef& str) {
104128
Md5 hash;
105129
hash.update(reinterpret_cast<const uint8_t*>(str.data()), str.size());
130+
uint8_t digest[16];
131+
hash.final(digest);
106132
Token token;
107-
hash.final(&token.hi, &token.lo);
133+
134+
// For compatability with Cassandra we interpret the MD5 as a big-endian value:
135+
// Reference: https://docs.oracle.com/javase/7/docs/api/java/math/BigInteger.html#BigInteger(byte[])
136+
token.hi = encode(digest);
137+
token.lo = encode(digest + 8);
138+
139+
// Then we find the absolute value of the two's complement representation.
140+
token = abs(token);
141+
108142
return token;
109143
}
110144

src/token_map_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ struct RandomPartitioner {
121121
}
122122
};
123123

124+
static Token abs(Token token);
125+
static uint64_t encode(uint8_t* bytes);
126+
124127
static Token from_string(const StringRef& str);
125128
static Token hash(const StringRef& str);
126129
static StringRef name() { return "RandomPartitioner"; }

0 commit comments

Comments
 (0)