Skip to content
Permalink
Browse files
move quantiles and req from quantile_calculator to sorted_view (and f…
…ix a quantiles deser bug)
  • Loading branch information
Jon committed May 6, 2022
1 parent 295f4c3 commit 5008526064330d5f2bd558d3f495b388fe9fb3e8
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 224 deletions.
@@ -43,8 +43,6 @@ install(FILES
include/conditional_forward.hpp
include/ceiling_power_of_2.hpp
include/bounds_binomial_proportions.hpp
include/quantile_calculator.hpp
include/quantile_calculator_impl.hpp
include/quantile_sketch_sorted_view.hpp
include/quantile_sketch_sorted_view_impl.hpp
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")

This file was deleted.

This file was deleted.

@@ -32,6 +32,7 @@
namespace datasketches {

void println_string(std::string str) {
unused(str);
//std::cout << str << std::endl;
}

@@ -26,6 +26,7 @@
namespace datasketches {

static void println(std::string& str) {
unused(str);
//std::cout << str << "\n";
}

@@ -24,7 +24,7 @@
#include <memory>
#include <vector>

#include "quantile_calculator.hpp"
#include "quantile_sketch_sorted_view.hpp"
#include "common_defs.hpp"
#include "serde.hpp"

@@ -241,8 +241,9 @@ class quantiles_sketch {
*
* @return the approximation to the value at the given rank
*/
using quantile_return_type = typename quantile_sketch_sorted_view<T, C, A>::quantile_return_type;
template<bool inclusive = false>
const T& get_quantile(double rank) const;
quantile_return_type get_quantile(double rank) const;

/**
* This is a more efficient multiple-query version of get_quantile().
@@ -441,6 +442,9 @@ class quantiles_sketch {
const_iterator begin() const;
const_iterator end() const;

template<bool inclusive = false>
quantile_sketch_sorted_view<T, C, A> get_sorted_view(bool cumulative) const;

private:
using Level = std::vector<T, Allocator>;
using AllocLevel = typename std::allocator_traits<Allocator>::template rebind_alloc<Level>;
@@ -481,13 +485,6 @@ class quantiles_sketch {
T* max_value_;
bool is_sorted_;

using QuantileCalculator = quantile_calculator<T, Comparator, Allocator>;
using AllocCalc = typename std::allocator_traits<Allocator>::template rebind_alloc<QuantileCalculator>;
class calculator_deleter;
using QuantileCalculatorPtr = typename std::unique_ptr<QuantileCalculator, calculator_deleter>;
template<bool inclusive>
QuantileCalculatorPtr get_quantile_calculator() const;

// for deserialization
class item_deleter;
class items_deleter;
@@ -359,8 +359,12 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
// load base buffer
const uint32_t bb_items = compute_base_buffer_items(k, items_seen);
uint32_t items_to_read = (levels_needed == 0 || is_compact) ? bb_items : 2 * k;
Level base_buffer = deserialize_array(is, items_to_read, 2 * k, serde, allocator);

Level base_buffer = deserialize_array(is, bb_items, 2 * k, serde, allocator);
if (items_to_read > bb_items) { // either equal or greater, never read fewer items
// read remaining items, but don't store them
deserialize_array(is, items_to_read - bb_items, items_to_read - bb_items, serde, allocator);
}

// populate vector of Levels directly
VectorLevels levels(allocator);
levels.reserve(levels_needed);
@@ -470,9 +474,14 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
// load base buffer
const uint32_t bb_items = compute_base_buffer_items(k, items_seen);
uint32_t items_to_read = (levels_needed == 0 || is_compact) ? bb_items : 2 * k;
auto base_buffer_pair = deserialize_array(ptr, end_ptr - ptr, items_to_read, 2 * k, serde, allocator);
auto base_buffer_pair = deserialize_array(ptr, end_ptr - ptr, bb_items, 2 * k, serde, allocator);
ptr += base_buffer_pair.second;

if (items_to_read > bb_items) { // either equal or greater, never read fewer items
// read remaining items, only use to advance the pointer
auto extras = deserialize_array(ptr, end_ptr - ptr, items_to_read - bb_items, items_to_read - bb_items, serde, allocator);
ptr += extras.second;
}

// populate vector of Levels directly
VectorLevels levels(allocator);
levels.reserve(levels_needed);
@@ -637,65 +646,51 @@ double quantiles_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool is_
: 1.576 / std::pow(k, 0.9726);
}

template<typename T, typename C, typename A>
class quantiles_sketch<T, C, A>::calculator_deleter {
public:
explicit calculator_deleter(const AllocCalc& allocator): allocator_(allocator) {}
void operator() (QuantileCalculator* ptr) {
if (ptr != nullptr) {
ptr->~QuantileCalculator();
allocator_.deallocate(ptr, 1);
}
}
private:
AllocCalc allocator_;
};

template<typename T, typename C, typename A>
template<bool inclusive>
auto quantiles_sketch<T, C, A>::get_quantile_calculator() const -> QuantileCalculatorPtr {
// allow side effect of sorting the base buffer
// can't set the sorted flag since this is a const method
quantile_sketch_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view(bool cumulative) const {
// allow side-effect of sorting the base buffer; can't set the flag since
// this is a const method
if (!is_sorted_) {
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
}
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);

AllocCalc ac(allocator_);
QuantileCalculatorPtr quantile_calculator_ptr(
new (ac.allocate(1)) quantile_calculator<T, C, A>(n_, ac),
calculator_deleter(ac)
);

uint8_t lg_weight = 0;
quantile_calculator_ptr->add(base_buffer_.data(), base_buffer_.data() + base_buffer_.size(), lg_weight);
uint64_t weight = 1;
view.add(base_buffer_.begin(), base_buffer_.end(), weight);
for (auto& level : levels_) {
++lg_weight;
weight <<= 1;
if (level.empty()) { continue; }
quantile_calculator_ptr->add(level.data(), level.data() + k_, lg_weight);
view.add(level.begin(), level.end(), weight);
}
quantile_calculator_ptr->template convert_to_cummulative<inclusive>();
return quantile_calculator_ptr;

if (cumulative) view.template convert_to_cummulative<inclusive>();
return view;
}

template<typename T, typename C, typename A>
template<bool inclusive>
const T& quantiles_sketch<T, C, A>::get_quantile(double rank) const {
auto quantiles_sketch<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
if (is_empty()) return get_invalid_value();
if (rank == 0.0) return *min_value_;
if (rank == 1.0) return *max_value_;
if ((rank < 0.0) || (rank > 1.0)) {
throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
}
return *(get_quantile_calculator<inclusive>()->get_quantile(rank));
// possible side-effect: sorting base buffer
return get_sorted_view<inclusive>(true).get_quantile(rank);
}

template<typename T, typename C, typename A>
template<bool inclusive>
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size) const {
std::vector<T, A> quantiles(allocator_);
if (is_empty()) return quantiles;
QuantileCalculatorPtr quantile_calculator_ptr(nullptr, calculator_deleter(allocator_));
quantiles.reserve(size);

// possible side-effect: sorting base buffer
auto view = get_sorted_view<inclusive>(true);

for (uint32_t i = 0; i < size; ++i) {
const double rank = ranks[i];
if ((rank < 0.0) || (rank > 1.0)) {
@@ -704,11 +699,7 @@ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks,
if (rank == 0.0) quantiles.push_back(*min_value_);
else if (rank == 1.0) quantiles.push_back(*max_value_);
else {
if (!quantile_calculator_ptr) {
// has side effect of sorting level zero if needed
quantile_calculator_ptr = const_cast<quantiles_sketch*>(this)->get_quantile_calculator<inclusive>();
}
quantiles.push_back(*(quantile_calculator_ptr->get_quantile(rank)));
quantiles.push_back(view.get_quantile(rank));
}
}
return quantiles;
@@ -62,7 +62,6 @@ static void quantiles_decode_and_check(uint16_t k, uint64_t n, const std::string
(std::istreambuf_iterator<char>(infile)),
(std::istreambuf_iterator<char>()));
infile.close();

auto sketch_bytes = quantiles_double_sketch::deserialize(bytes.data(), bytes.size(), serde<double>(), 0);
REQUIRE(sketch_bytes.get_quantile(median_rank) == expected_median);
}
@@ -22,7 +22,7 @@

#include "req_common.hpp"
#include "req_compactor.hpp"
#include "quantile_calculator.hpp"
#include "quantile_sketch_sorted_view.hpp"

#include <stdexcept>

@@ -180,8 +180,9 @@ class req_sketch {
* @param rank the given normalized rank
* @return approximate quantile given the normalized rank
*/
using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
template<bool inclusive = false>
const T& get_quantile(double rank) const;
quantile_return_type get_quantile(double rank) const;

/**
* Returns an array of quantiles that correspond to the given array of normalized ranks.
@@ -314,6 +315,9 @@ class req_sketch {
const_iterator begin() const;
const_iterator end() const;

template<bool inclusive = false>
quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;

private:
Allocator allocator_;
uint16_t k_;
@@ -345,13 +349,6 @@ class req_sketch {
static double get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra);
static bool is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra);

using QuantileCalculator = quantile_calculator<T, Comparator, Allocator>;
using AllocCalc = typename std::allocator_traits<Allocator>::template rebind_alloc<QuantileCalculator>;
class calculator_deleter;
using QuantileCalculatorPtr = typename std::unique_ptr<QuantileCalculator, calculator_deleter>;
template<bool inclusive>
QuantileCalculatorPtr get_quantile_calculator() const;

// for deserialization
class item_deleter;
req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);

0 comments on commit 5008526

Please sign in to comment.