Skip to content
Permalink
Browse files
Merge pull request #259 from apache/quantile_sketch_sorted_view
Quantile sketch sorted view
  • Loading branch information
AlexanderSaydakov committed Feb 8, 2022
2 parents 7b42551 + 513e49b commit 641def5cc4fb7a26fe03e3bd17b735b23f1eecaf
Showing 12 changed files with 328 additions and 320 deletions.
@@ -43,4 +43,6 @@ install(FILES
include/conditional_forward.hpp
include/ceiling_power_of_2.hpp
include/bounds_binomial_proportions.hpp
include/quantile_sketch_sorted_view.hpp
include/quantile_sketch_sorted_view_impl.hpp
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef QUANTILE_SKETCH_SORTED_VIEW_HPP_
#define QUANTILE_SKETCH_SORTED_VIEW_HPP_

#include <functional>

namespace datasketches {

template<
typename T,
typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
typename Allocator
>
class quantile_sketch_sorted_view {
public:
using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using Container = std::vector<Entry, AllocEntry>;

quantile_sketch_sorted_view(const Allocator& allocator);

template<typename Iterator>
void add(Iterator begin, Iterator end, uint64_t weight);

template<bool inclusive>
void convert_to_cummulative();

class const_iterator;
const_iterator begin() const;
const_iterator end() const;

size_t size() const;

// makes sense only with cumulative weight
using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
quantile_return_type get_quantile(double rank) const;

private:
static inline const T& deref_helper(const T* t) { return *t; }
static inline T deref_helper(T t) { return t; }

struct compare_pairs_by_first_ptr {
bool operator()(const Entry& a, const Entry& b) {
return Comparator()(deref_helper(a.first), deref_helper(b.first));
}
};

struct compare_pairs_by_second {
bool operator()(const Entry& a, const Entry& b) {
return a.second < b.second;
}
};

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
static inline T ref_helper(const T& t) { return t; }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
static inline const T* ref_helper(const T& t) { return std::addressof(t); }

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
static inline Entry make_dummy_entry(uint64_t weight) { return Entry(0, weight); }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }

uint64_t total_weight_;
Container entries_;
};

template<typename T, typename C, typename A>
class quantile_sketch_sorted_view<T, C, A>::const_iterator: public quantile_sketch_sorted_view<T, C, A>::Container::const_iterator {
public:
using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;

const_iterator(const Base& it): Base(it) {}

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
value_type operator*() const { return Base::operator*(); }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }

class return_value_holder {
public:
return_value_holder(value_type value): value_(value) {}
const value_type* operator->() const { return &value_; }
private:
value_type value_;
};

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
const value_type* operator->() const { return Base::operator->(); }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
return_value_holder operator->() const { return **this; }
};

} /* namespace datasketches */

#include "quantile_sketch_sorted_view_impl.hpp"

#endif
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
#define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_

#include <algorithm>

namespace datasketches {

template<typename T, typename C, typename A>
quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(const A& allocator):
total_weight_(0),
entries_(allocator)
{}

template<typename T, typename C, typename A>
template<typename Iterator>
void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
if (entries_.capacity() < entries_.size() + std::distance(first, last)) entries_.reserve(entries_.size() + std::distance(first, last));
const size_t size_before = entries_.size();
for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
if (size_before > 0) std::inplace_merge(entries_.begin(), entries_.begin() + size_before, entries_.end(), compare_pairs_by_first_ptr());
}

template<typename T, typename C, typename A>
template<bool inclusive>
void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
uint64_t subtotal = 0;
for (auto& entry: entries_) {
const uint64_t new_subtotal = subtotal + entry.second;
entry.second = inclusive ? new_subtotal : subtotal;
subtotal = new_subtotal;
}
total_weight_ = subtotal;
}

template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
return deref_helper(it->first);
}

template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
return entries_.begin();
}

template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
return entries_.end();
}

template<typename T, typename C, typename A>
size_t quantile_sketch_sorted_view<T, C, A>::size() const {
return entries_.size();
}

} /* namespace datasketches */

#endif
@@ -41,8 +41,6 @@ install(FILES
include/kll_sketch_impl.hpp
include/kll_helper.hpp
include/kll_helper_impl.hpp
include/kll_quantile_calculator.hpp
include/kll_quantile_calculator_impl.hpp
include/kolmogorov_smirnov.hpp
include/kolmogorov_smirnov_impl.hpp
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")

This file was deleted.

0 comments on commit 641def5

Please sign in to comment.