Skip to content
Permalink
Browse files
remove unnecessary move, sort BB as side-effect on serialize despite …
…const method since always compact
  • Loading branch information
jmalkin committed Apr 5, 2022
1 parent 998e497 commit 093e33e06e7b73ad346113338c04ecdd599745a4
Showing 2 changed files with 25 additions and 51 deletions.
@@ -174,10 +174,14 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
const uint8_t family = FAMILY;
write(os, family);

// side-effect: sort base buffer since always compact
// can't set is_sorted_ since const method
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());

// empty, ordered, compact are valid flags
const uint8_t flags_byte(
(is_empty() ? 1 << flags::IS_EMPTY : 0)
| (is_sorted_ ? 1 << flags::IS_SORTED : 0)
| (1 << flags::IS_SORTED) // always sorted as side effect noted above
| (1 << flags::IS_COMPACT) // always compact -- could be optional for numeric types?
);
write(os, flags_byte);
@@ -218,11 +222,15 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
const uint8_t family = FAMILY;
ptr += copy_to_mem(family, ptr);

// side-effect: sort base buffer since always compact
// can't set is_sorted_ since const method
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());

// empty, ordered, compact are valid flags
const uint8_t flags_byte(
(is_empty() ? 1 << flags::IS_EMPTY : 0)
| (is_sorted_ ? 1 << flags::IS_SORTED : 0)
| (1 << flags::IS_COMPACT) // always compact -- could be optional for numeric types?
| (1 << flags::IS_SORTED) // always sorted as side effect noted above
| (1 << flags::IS_COMPACT) // always compact
);
ptr += copy_to_mem(flags_byte, ptr);
ptr += copy_to_mem(k_, ptr);
@@ -345,7 +353,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
level.insert(level.begin(),
std::make_move_iterator(items.get()),
std::make_move_iterator(items.get() + num_items));
return std::move(level);
return level;
}

template<typename T, typename C, typename A>
@@ -274,20 +274,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
}
}
/*
SECTION("deserialize from java") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1000000);
REQUIRE(sketch.get_num_retained() == 614);
REQUIRE(sketch.get_min_value() == 0.0);
REQUIRE(sketch.get_max_value() == 999999.0);
}
*/

SECTION("stream serialize deserialize empty") {
quantiles_float_sketch sketch(200, 0);
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -358,20 +345,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.get_rank(1) == 0.0);
REQUIRE(sketch2.get_rank(2) == 1.0);
}
/*
SECTION("deserialize one item v1") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
auto sketch = quantiles_float_sketch::deserialize(is, serde<float>(), test_allocator<float>(0));
REQUIRE_FALSE(sketch.is_empty());
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
REQUIRE(sketch.get_min_value() == 1.0);
REQUIRE(sketch.get_max_value() == 1.0);
}
*/

SECTION("stream serialize deserialize three items") {
quantiles_float_sketch sketch(200, 0);
sketch.update(1.0f);
@@ -745,30 +719,22 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
/*
SECTION("max serialized size arithmetic type") {
REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 10) == 1968);
REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 100) == 2316);
REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000) == 2440);
REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000000) == 2800);
REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000000000) == 3160);
REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 10) == 1968);
REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 100) == 2316);
REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 1000) == 2440);
REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(256, 1000000) == 2800);
REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(256, 1000000000) == 3160);
}
SECTION("max serialized size non-arithmetic type") {
REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 10, 4) == 1968);
REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 100, 4) == 2316);
REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000, 4) == 2440);
REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000, 4) == 2800);
REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
}
SECTION("issue #236") {
kll_sketch<int8_t> kll;
kll.update(1);
kll.update(2);
kll.update(3);
auto blob = kll.serialize();
auto kll2 = kll_sketch<int8_t>::deserialize(blob.data(), blob.size());
REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 10, 4) == 1968);
REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 100, 4) == 2316);
REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000, 4) == 2440);
REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000, 4) == 2800);
REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
}
*/

// cleanup
if (test_allocator_total_bytes != 0) {
REQUIRE(test_allocator_total_bytes == 0);

0 comments on commit 093e33e

Please sign in to comment.