Skip to content
Permalink
Browse files
add type-converting copy constructor to quantiles sketch
  • Loading branch information
Jon committed May 12, 2022
1 parent dfd9496 commit f8b04b2eadc99d931439ac025f28d167b846192b
Showing 4 changed files with 100 additions and 2 deletions.
@@ -161,6 +161,9 @@ class quantiles_sketch {
quantiles_sketch& operator=(const quantiles_sketch& other);
quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;

template<typename From, typename FC, typename FA>
explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());

/**
* Updates this sketch with the given data item.
* @param value an item from a stream of items
@@ -138,6 +138,70 @@ is_sorted_(is_sorted)
throw std::logic_error("Item count does not match value computed from k, n");
}

template<typename T, typename C, typename A>
template<typename From, typename FC, typename FA>
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
allocator_(allocator),
k_(other.get_k()),
n_(other.get_n()),
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
base_buffer_(allocator),
levels_(allocator),
min_value_(nullptr),
max_value_(nullptr),
is_sorted_(false)
{
static_assert(std::is_convertible<From, T>::value
|| std::is_constructible<From, T>::value,
"Copy constructor across types requires std::is_convertible or std::is_constructible");

if (other.is_empty()) {
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
} else {
min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());

// reserve space in levels
uint8_t num_levels = compute_levels_needed(k_, n_);
levels_.reserve(num_levels);
for (int i = 0; i < num_levels; ++i) {
Level level(allocator);
level.reserve(k_);
levels_.push_back(std::move(level));
}

// iterate through points, assigning to the correct level as needed
for (auto pair : other) {
uint64_t wt = pair.second;
if (wt == 1) {
base_buffer_.push_back(pair.first);
// resize where needed as if adding points via update()
if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
base_buffer_.reserve(new_size);
}
}
else {
uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
levels_[idx].push_back(pair.first);
}
}

// validate that ordering within each level is preserved
// base_buffer_ can be considered unsorted for this purpose
for (int i = 0; i < num_levels; ++i) {
Level* level = &levels_[i];
size_t num_items = level->size();
for (size_t j = 1; j < num_items; ++j) {
if (C()(level->at(j), level->at(j - 1))) {
throw std::logic_error("Copy construction across types produces invalid sorting");
}
}
}
}
}


template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>::~quantiles_sketch() {
if (min_value_ != nullptr) {
@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
REQUIRE(delta == Approx(0.02).margin(0.01));
const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";

REQUIRE_FALSE(delta > threshold);
REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
}
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
REQUIRE(delta == Approx(0.02).margin(0.01));
const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";

REQUIRE(delta > threshold);
REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
}
@@ -903,6 +903,37 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
}

SECTION("Type converting copy constructor") {
const uint16_t k = 8;
const int n = 403;
quantiles_sketch<double> sk_double(k);

quantiles_sketch<float> sk_float(k);
REQUIRE(sk_float.is_empty());

for (int i = 0; i < n; ++i) sk_double.update(i + .01);

quantiles_sketch<int> sk_int(sk_double);
REQUIRE(sk_double.get_n() == sk_int.get_n());
REQUIRE(sk_double.get_k() == sk_int.get_k());
REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());

auto sv_double = sk_double.get_sorted_view(false);
std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());

auto sv_int = sk_int.get_sorted_view(false);
std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());

REQUIRE(vec_double.size() == vec_int.size());

for (size_t i = 0; i < vec_int.size(); ++i) {
// known truncation with conversion so approximate result
REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
// exact equality for weights
REQUIRE(vec_double[i].second == vec_int[i].second);
}
}

// cleanup
if (test_allocator_total_bytes != 0) {
REQUIRE(test_allocator_total_bytes == 0);

0 comments on commit f8b04b2

Please sign in to comment.