From 11451fff4db537741df8290b3319496c005055a0 Mon Sep 17 00:00:00 2001 From: Michael Penick Date: Tue, 11 Oct 2016 08:21:14 -0700 Subject: [PATCH] Fix: Random partitioner hash values should be in the range [1, 2^127-1] --- src/md5.cpp | 51 ++-- src/md5.hpp | 2 - src/token_map_impl.cpp | 36 ++- src/token_map_impl.hpp | 3 + test/unit_tests/src/test_token.cpp | 374 +++++++++++++++++++++++++++++ test/unit_tests/src/uint128.hpp | 1 + 6 files changed, 432 insertions(+), 35 deletions(-) create mode 100644 test/unit_tests/src/test_token.cpp diff --git a/src/md5.cpp b/src/md5.cpp index efaacd637..ae52b8f6c 100644 --- a/src/md5.cpp +++ b/src/md5.cpp @@ -102,38 +102,6 @@ void Md5::update(const uint8_t* data, size_t size) { } void Md5::final(uint8_t* result) { - final(); - - result[0] = a_; - result[1] = a_ >> 8; - result[2] = a_ >> 16; - result[3] = a_ >> 24; - result[4] = b_; - result[5] = b_ >> 8; - result[6] = b_ >> 16; - result[7] = b_ >> 24; - result[8] = c_; - result[9] = c_ >> 8; - result[10] = c_ >> 16; - result[11] = c_ >> 24; - result[12] = d_; - result[13] = d_ >> 8; - result[14] = d_ >> 16; - result[15] = d_ >> 24; - - memset(this, 0, sizeof(Md5)); -} - -void Md5::final(uint64_t* hi, uint64_t* lo) { - final(); - - *hi = static_cast(a_) << 32 | (static_cast(b_) & 0xFFFFFFFF); - *lo = static_cast(c_) << 32 | (static_cast(d_) & 0xFFFFFFFF); - - memset(this, 0, sizeof(Md5)); -} - -void Md5::final() { unsigned long used, free; used = lo_ & 0x3f; @@ -162,6 +130,25 @@ void Md5::final() { buffer_[63] = hi_ >> 24; body(buffer_, 64); + + result[0] = a_; + result[1] = a_ >> 8; + result[2] = a_ >> 16; + result[3] = a_ >> 24; + result[4] = b_; + result[5] = b_ >> 8; + result[6] = b_ >> 16; + result[7] = b_ >> 24; + result[8] = c_; + result[9] = c_ >> 8; + result[10] = c_ >> 16; + result[11] = c_ >> 24; + result[12] = d_; + result[13] = d_ >> 8; + result[14] = d_ >> 16; + result[15] = d_ >> 24; + + memset(this, 0, sizeof(Md5)); } // This processes one or more 64-byte data blocks, but does NOT update diff --git a/src/md5.hpp b/src/md5.hpp index bd526093b..d3f1ddf8f 100644 --- a/src/md5.hpp +++ b/src/md5.hpp @@ -33,10 +33,8 @@ class Md5 { void update(const uint8_t* data, size_t size); void final(uint8_t* result); - void final(uint64_t* hi, uint64_t* lo); private: - void final(); const uint8_t* body(const uint8_t* data, size_t size); private: diff --git a/src/token_map_impl.cpp b/src/token_map_impl.cpp index 8289fe415..d8f8fe24e 100644 --- a/src/token_map_impl.cpp +++ b/src/token_map_impl.cpp @@ -100,11 +100,45 @@ RandomPartitioner::Token RandomPartitioner::from_string(const StringRef& str) { return token; } +uint64_t RandomPartitioner::encode(uint8_t* bytes) { + uint64_t result = 0; + const size_t num_bytes = sizeof(uint64_t); + for (size_t i = 0; i < num_bytes; ++i) { + result |= (static_cast(bytes[i]) << (8 * (num_bytes - i - 1))); + } + return result; +} + +RandomPartitioner::Token RandomPartitioner::abs(RandomPartitioner::Token token) { + if (token.hi & 0x8000000000000000ULL) { + token.hi = ~token.hi; + token.lo = ~token.lo; + + uint64_t old_lo = token.lo; + ++token.lo; + // Carry to "hi" if our "lo" value wrapped + if(token.lo < old_lo) { + ++token.hi; + } + } + return token; +} + RandomPartitioner::Token RandomPartitioner::hash(const StringRef& str) { Md5 hash; hash.update(reinterpret_cast(str.data()), str.size()); + uint8_t digest[16]; + hash.final(digest); Token token; - hash.final(&token.hi, &token.lo); + + // For compatability with Cassandra we interpret the MD5 as a big-endian value: + // Reference: https://docs.oracle.com/javase/7/docs/api/java/math/BigInteger.html#BigInteger(byte[]) + token.hi = encode(digest); + token.lo = encode(digest + 8); + + // Then we find the absolute value of the two's complement representation. + token = abs(token); + return token; } diff --git a/src/token_map_impl.hpp b/src/token_map_impl.hpp index 21e34c9e7..ea76adb35 100644 --- a/src/token_map_impl.hpp +++ b/src/token_map_impl.hpp @@ -121,6 +121,9 @@ struct RandomPartitioner { } }; + static Token abs(Token token); + static uint64_t encode(uint8_t* bytes); + static Token from_string(const StringRef& str); static Token hash(const StringRef& str); static StringRef name() { return "RandomPartitioner"; } diff --git a/test/unit_tests/src/test_token.cpp b/test/unit_tests/src/test_token.cpp new file mode 100644 index 000000000..087c53daf --- /dev/null +++ b/test/unit_tests/src/test_token.cpp @@ -0,0 +1,374 @@ +/* + Copyright (c) 2014-2016 DataStax + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifdef STAND_ALONE +# define BOOST_TEST_MODULE cassandra +#endif + +#include "token_map_impl.hpp" + +#include "uint128.hpp" + +#include +#include + +#include + +namespace { + +std::string to_string(cass::RandomPartitioner::Token token) { + return numeric::uint128_t(token.lo, token.hi).to_string(); +} + +} // namespace + +BOOST_AUTO_TEST_SUITE(token) + +BOOST_AUTO_TEST_CASE(random_abs) +{ + // Two's complement: -170141183460469231731687303715884105728 + { + uint8_t digest[16] = { }; + digest[0] = 0x80; + + cass::RandomPartitioner::Token token; + token.hi = cass::RandomPartitioner::encode(digest); + token.lo = cass::RandomPartitioner::encode(digest + 8); + token = cass::RandomPartitioner::abs(token); + + BOOST_CHECK(to_string(token) == "170141183460469231731687303715884105728"); + + } + + // Two's complement: -170141183460469231731687303715884105727 + { + uint8_t digest[16] = { }; + digest[0] = 0x80; + digest[15] = 0x01; + + cass::RandomPartitioner::Token token; + token.hi = cass::RandomPartitioner::encode(digest); + token.lo = cass::RandomPartitioner::encode(digest + 8); + token = cass::RandomPartitioner::abs(token); + + BOOST_CHECK(to_string(token) == "170141183460469231731687303715884105727"); + } + + // Two's complement: -18446744073709551616 + { + uint8_t digest[16] = { }; + digest[0] = 0xFF; + digest[1] = 0xFF; + digest[2] = 0xFF; + digest[3] = 0xFF; + digest[4] = 0xFF; + digest[5] = 0xFF; + digest[6] = 0xFF; + digest[7] = 0xFF; + + cass::RandomPartitioner::Token token; + token.hi = cass::RandomPartitioner::encode(digest); + token.lo = cass::RandomPartitioner::encode(digest + 8); + token = cass::RandomPartitioner::abs(token); + + BOOST_CHECK(to_string(token) == "18446744073709551616"); + } + + // Two's complement: 0 + { + uint8_t digest[16] = { }; + + cass::RandomPartitioner::Token token; + token.hi = cass::RandomPartitioner::encode(digest); + token.lo = cass::RandomPartitioner::encode(digest + 8); + token = cass::RandomPartitioner::abs(token); + + BOOST_CHECK(numeric::uint128_t(token.lo, token.hi).to_string() == "0"); + } + + // Two's complement: 170141183460469231731687303715884105727 + { + uint8_t digest[16] = { }; + digest[0] = 0x7F; + digest[1] = 0xFF; + digest[2] = 0xFF; + digest[3] = 0xFF; + digest[4] = 0xFF; + digest[5] = 0xFF; + digest[6] = 0xFF; + digest[7] = 0xFF; + digest[8] = 0xFF; + digest[9] = 0xFF; + digest[10] = 0xFF; + digest[11] = 0xFF; + digest[12] = 0xFF; + digest[13] = 0xFF; + digest[14] = 0xFF; + digest[15] = 0xFF; + + cass::RandomPartitioner::Token token; + token.hi = cass::RandomPartitioner::encode(digest); + token.lo = cass::RandomPartitioner::encode(digest + 8); + token = cass::RandomPartitioner::abs(token); + + BOOST_CHECK(to_string(token) == "170141183460469231731687303715884105727"); + } +} + +BOOST_AUTO_TEST_CASE(random_less_than) +{ + // 'hi' is the same and 'lo' is less than + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: 0 + { + uint8_t digest[16] = { }; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: 1 + { + uint8_t digest[16] = { }; + digest[15] = 0x01; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(t1 < t2); + } + + // 'lo' is the same and 'hi' is less than + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: 18446744073709551616 + { + uint8_t digest[16] = { }; + digest[7] = 0x01; + + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: 36893488147419103232 + { + uint8_t digest[16] = { }; + digest[7] = 0x02; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(t1 < t2); + } + + // Absolute value of negative values + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: -170141183460469231731687303715884105727 + { + uint8_t digest[16] = { }; + digest[0] = 0x80; + digest[15] = 0x01; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: -170141183460469231731687303715884105728 + { + uint8_t digest[16] = { }; + digest[0] = 0x80; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(t1 < t2); + } + + // Same value + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: 18446744073709551616 + { + uint8_t digest[16] = { }; + digest[7] = 0x01; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: 18446744073709551616 + { + uint8_t digest[16] = { }; + digest[7] = 0x01; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(!(t1 < t2)); + } + + // Zero + { + cass::RandomPartitioner::Token t1, t2; + + { + uint8_t digest[16] = { }; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + { + uint8_t digest[16] = { }; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(!(t1 < t2)); + } +} + +BOOST_AUTO_TEST_CASE(random_equal) +{ + // Same value + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: 18446744073709551616 + { + uint8_t digest[16] = { }; + digest[7] = 0x01; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: 18446744073709551616 + { + uint8_t digest[16] = { }; + digest[7] = 0x01; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(t1 == t2); + } + + // Zero + { + cass::RandomPartitioner::Token t1, t2; + + { + uint8_t digest[16] = { }; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + { + uint8_t digest[16] = { }; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(t1 == t2); + } + + // 'hi' is the same and 'lo' is less than + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: 0 + { + uint8_t digest[16] = { }; + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: 1 + { + uint8_t digest[16] = { }; + digest[15] = 0x01; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(!(t1 == t2)); + } + + // 'lo' is the same and 'hi' is less than + { + cass::RandomPartitioner::Token t1, t2; + + // Two's complement: 18446744073709551616 + { + uint8_t digest[16] = { }; + digest[7] = 0x01; + + t1.hi = cass::RandomPartitioner::encode(digest); + t1.lo = cass::RandomPartitioner::encode(digest + 8); + t1 = cass::RandomPartitioner::abs(t1); + } + + // Two's complement: 36893488147419103232 + { + uint8_t digest[16] = { }; + digest[7] = 0x02; + t2.hi = cass::RandomPartitioner::encode(digest); + t2.lo = cass::RandomPartitioner::encode(digest + 8); + t2 = cass::RandomPartitioner::abs(t2); + } + + BOOST_CHECK(!(t1 == t2)); + } +} + +BOOST_AUTO_TEST_CASE(random_hash) +{ + // Sampled using: SELECT token(key) FROM sometable; + BOOST_CHECK(to_string(cass::RandomPartitioner::hash("a")) == "16955237001963240173058271559858726497"); + BOOST_CHECK(to_string(cass::RandomPartitioner::hash("b")) == "144992942750327304334463589818972416113"); + BOOST_CHECK(to_string(cass::RandomPartitioner::hash("c")) == "99079589977253916124855502156832923443"); + BOOST_CHECK(to_string(cass::RandomPartitioner::hash("d")) == "166860289390734216023086131251507064403"); + BOOST_CHECK(to_string(cass::RandomPartitioner::hash("abc")) == "148866708576779697295343134153845407886"); + BOOST_CHECK(to_string(cass::RandomPartitioner::hash("xyz")) == "61893731502141497228477852773302439842"); +} + +BOOST_AUTO_TEST_CASE(random_from_string) +{ + BOOST_CHECK(to_string(cass::RandomPartitioner::from_string("0")) == "0"); + BOOST_CHECK(to_string(cass::RandomPartitioner::from_string("1")) == "1"); + BOOST_CHECK(to_string(cass::RandomPartitioner::from_string("170141183460469231731687303715884105727")) == "170141183460469231731687303715884105727"); + BOOST_CHECK(to_string(cass::RandomPartitioner::from_string("170141183460469231731687303715884105728")) == "170141183460469231731687303715884105728"); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/unit_tests/src/uint128.hpp b/test/unit_tests/src/uint128.hpp index 9414d8b36..9d693141a 100644 --- a/test/unit_tests/src/uint128.hpp +++ b/test/unit_tests/src/uint128.hpp @@ -95,6 +95,7 @@ class uint128 : boost::operators, boost::shiftable { uint128(double value) : lo(static_cast(value)), hi(0) {} uint128(const uint128 &value) : lo(value.lo), hi (value.hi) {} uint128(base_type value) : lo(value), hi(0) {} + uint128(base_type lo, base_type hi) : lo(lo), hi(hi) {} uint128(const std::string &sz) : lo(0), hi(0) {