Skip to content

Commit 86bac57

Browse files
author
Michael Penick
committed
Fix: Random partitioner hash values should be in the range [1, 2^127-1]
1 parent 188d004 commit 86bac57

File tree

5 files changed

+58
-35
lines changed

5 files changed

+58
-35
lines changed

src/md5.cpp

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -102,38 +102,6 @@ void Md5::update(const uint8_t* data, size_t size) {
102102
}
103103

104104
void Md5::final(uint8_t* result) {
105-
final();
106-
107-
result[0] = a_;
108-
result[1] = a_ >> 8;
109-
result[2] = a_ >> 16;
110-
result[3] = a_ >> 24;
111-
result[4] = b_;
112-
result[5] = b_ >> 8;
113-
result[6] = b_ >> 16;
114-
result[7] = b_ >> 24;
115-
result[8] = c_;
116-
result[9] = c_ >> 8;
117-
result[10] = c_ >> 16;
118-
result[11] = c_ >> 24;
119-
result[12] = d_;
120-
result[13] = d_ >> 8;
121-
result[14] = d_ >> 16;
122-
result[15] = d_ >> 24;
123-
124-
memset(this, 0, sizeof(Md5));
125-
}
126-
127-
void Md5::final(uint64_t* hi, uint64_t* lo) {
128-
final();
129-
130-
*hi = static_cast<uint64_t>(a_) << 32 | (static_cast<uint64_t>(b_) & 0xFFFFFFFF);
131-
*lo = static_cast<uint64_t>(c_) << 32 | (static_cast<uint64_t>(d_) & 0xFFFFFFFF);
132-
133-
memset(this, 0, sizeof(Md5));
134-
}
135-
136-
void Md5::final() {
137105
unsigned long used, free;
138106

139107
used = lo_ & 0x3f;
@@ -162,6 +130,25 @@ void Md5::final() {
162130
buffer_[63] = hi_ >> 24;
163131

164132
body(buffer_, 64);
133+
134+
result[0] = a_;
135+
result[1] = a_ >> 8;
136+
result[2] = a_ >> 16;
137+
result[3] = a_ >> 24;
138+
result[4] = b_;
139+
result[5] = b_ >> 8;
140+
result[6] = b_ >> 16;
141+
result[7] = b_ >> 24;
142+
result[8] = c_;
143+
result[9] = c_ >> 8;
144+
result[10] = c_ >> 16;
145+
result[11] = c_ >> 24;
146+
result[12] = d_;
147+
result[13] = d_ >> 8;
148+
result[14] = d_ >> 16;
149+
result[15] = d_ >> 24;
150+
151+
memset(this, 0, sizeof(Md5));
165152
}
166153

167154
// This processes one or more 64-byte data blocks, but does NOT update

src/md5.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@ class Md5 {
3333

3434
void update(const uint8_t* data, size_t size);
3535
void final(uint8_t* result);
36-
void final(uint64_t* hi, uint64_t* lo);
3736

3837
private:
39-
void final();
4038
const uint8_t* body(const uint8_t* data, size_t size);
4139

4240
private:

src/token_map_impl.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,45 @@ RandomPartitioner::Token RandomPartitioner::from_string(const StringRef& str) {
100100
return token;
101101
}
102102

103+
uint64_t RandomPartitioner::encode(uint8_t* bytes) {
104+
uint64_t result = 0;
105+
const size_t num_bytes = sizeof(uint64_t);
106+
for (size_t i = 0; i < num_bytes; ++i) {
107+
result |= (static_cast<uint64_t>(bytes[i]) << (8 * (num_bytes - i - 1)));
108+
}
109+
return result;
110+
}
111+
112+
RandomPartitioner::Token RandomPartitioner::abs(RandomPartitioner::Token token) {
113+
if (token.hi & 0x8000000000000000ULL) {
114+
token.hi = ~token.hi;
115+
token.lo = ~token.lo;
116+
117+
uint64_t old_lo = token.lo;
118+
++token.lo;
119+
// Carry to "hi" if our "lo" value wrapped
120+
if(token.lo < old_lo) {
121+
++token.hi;
122+
}
123+
}
124+
return token;
125+
}
126+
103127
RandomPartitioner::Token RandomPartitioner::hash(const StringRef& str) {
104128
Md5 hash;
105129
hash.update(reinterpret_cast<const uint8_t*>(str.data()), str.size());
130+
uint8_t digest[16];
131+
hash.final(digest);
106132
Token token;
107-
hash.final(&token.hi, &token.lo);
133+
134+
// For compatability with Cassandra we interpret the MD5 as a big-endian value:
135+
// Reference: https://docs.oracle.com/javase/7/docs/api/java/math/BigInteger.html#BigInteger(byte[])
136+
token.hi = encode(digest);
137+
token.lo = encode(digest + 8);
138+
139+
// Then we find the absolute value of the two's complement representation.
140+
token = abs(token);
141+
108142
return token;
109143
}
110144

src/token_map_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ struct RandomPartitioner {
121121
}
122122
};
123123

124+
static Token abs(Token token);
125+
static uint64_t encode(uint8_t* bytes);
126+
124127
static Token from_string(const StringRef& str);
125128
static Token hash(const StringRef& str);
126129
static StringRef name() { return "RandomPartitioner"; }

test/unit_tests/src/uint128.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class uint128 : boost::operators<uint128>, boost::shiftable<uint128> {
9595
uint128(double value) : lo(static_cast<base_type>(value)), hi(0) {}
9696
uint128(const uint128 &value) : lo(value.lo), hi (value.hi) {}
9797
uint128(base_type value) : lo(value), hi(0) {}
98+
uint128(base_type lo, base_type hi) : lo(lo), hi(hi) {}
9899

99100
uint128(const std::string &sz) : lo(0), hi(0) {
100101

0 commit comments

Comments
 (0)