Skip to content
Permalink
Browse files
Merge branch 'master' into quantiles
  • Loading branch information
jmalkin committed May 5, 2022
2 parents 5da7792 + f0f085c commit 7e36654bbfcfdd14671666b3363204ca04ed9367
Showing 94 changed files with 2,309 additions and 782 deletions.
@@ -16,21 +16,18 @@ jobs:
name: Source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true
persist-credentials: false
- uses: actions/checkout@v3

- uses: actions/setup-python@v2
- uses: actions/setup-python@v3
name: Install Python
with:
python-version: '3.8'
python-version: '3.x'

- name: Install Python dependencies
run: python -m pip install --upgrade pip setuptools wheel numpy tox pytest build
- name: Install build package
run: python -m pip install build --user

- name: Build sdist
run: python setup.py sdist
run: python -m build --sdist --outdir dist

- uses: actions/upload-artifact@v2
with:
@@ -44,42 +41,36 @@ jobs:
matrix:
config:
- {
name: "MacOS Latest, Clang",
os: macos-latest,
cc: "clang", cxx: "clang++"
name: "MacOS 10.15",
os: macos-latest
}
- {
name: "Ubuntu Latest, GCC",
os: ubuntu-latest,
cc: "gcc", cxx: "g++"
name: "Ubuntu Latest",
os: ubuntu-latest
}
- {
name: "Windows Latest, MSVC",
os: windows-latest,
cc: "cl", cxx: "cl",
environment_script: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat"
name: "Windows Latest",
os: windows-latest
}

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
persist-credentials: false
uses: actions/checkout@v3

- name: Set up Python 3.x
uses: actions/setup-python@v2
with:
python-version: '3.8'
python-version: '3.x'

- name: Install Python dependencies
run: python -m pip install --upgrade pip setuptools wheel numpy tox pytest build cibuildwheel==2.1.1 twine
run: python -m pip install cibuildwheel==2.5.0

- name: Build wheels
run: python -m cibuildwheel --output-dir dist
env:
CIBW_SKIP: "*-win32"
CIBW_ARCHS_MACOS: "x86_64 universal2"
CIBW_BEFORE_BUILD: python -m pip install cmake>=3.18
CIBW_SKIP: "*-win32 pp*-macosx*"
CIBW_ARCHS_MACOS: "x86_64 arm64 universal2"

- uses: actions/upload-artifact@v2
with:
@@ -15,9 +15,9 @@
# specific language governing permissions and limitations
# under the License.

cmake_minimum_required(VERSION 3.12.0)
cmake_minimum_required(VERSION 3.16.0)
project(DataSketches
VERSION 3.2.0
VERSION 3.4.0
LANGUAGES CXX)

include(GNUInstallDirs)
@@ -26,7 +26,7 @@ Building and running unit tests using cmake for OSX and Linux:

```
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
$ cmake --build build/Release -t test
$ cmake --build build/Release -t all test
```

Building and running unit tests using cmake for Windows from the command line:
@@ -91,7 +91,7 @@ from GitHub using CMake's `ExternalProject` module. The code would look somethin
GIT_SHALLOW true
GIT_SUBMODULES ""
INSTALL_DIR /tmp/datasketches-prefix
CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
# Override the install command to add DESTDIR
# This is necessary to work around an oddity in the RPM (but not other) package
@@ -45,4 +45,6 @@ install(FILES
include/bounds_binomial_proportions.hpp
include/quantile_calculator.hpp
include/quantile_calculator_impl.hpp
include/quantile_sketch_sorted_view.hpp
include/quantile_sketch_sorted_view_impl.hpp
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -22,6 +22,7 @@

#include <algorithm>
#include <cmath>
#include <stdexcept>

/*
* This class enables the estimation of error bounds given a sample set size, the sampling
@@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef QUANTILE_SKETCH_SORTED_VIEW_HPP_
#define QUANTILE_SKETCH_SORTED_VIEW_HPP_

#include <functional>

namespace datasketches {

template<
typename T,
typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
typename Allocator
>
class quantile_sketch_sorted_view {
public:
using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using Container = std::vector<Entry, AllocEntry>;

quantile_sketch_sorted_view(uint32_t num, const Allocator& allocator);

template<typename Iterator>
void add(Iterator begin, Iterator end, uint64_t weight);

template<bool inclusive>
void convert_to_cummulative();

class const_iterator;
const_iterator begin() const;
const_iterator end() const;

size_t size() const;

// makes sense only with cumulative weight
using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
quantile_return_type get_quantile(double rank) const;

private:
static inline const T& deref_helper(const T* t) { return *t; }
static inline T deref_helper(T t) { return t; }

struct compare_pairs_by_first {
bool operator()(const Entry& a, const Entry& b) const {
return Comparator()(deref_helper(a.first), deref_helper(b.first));
}
};

struct compare_pairs_by_second {
bool operator()(const Entry& a, const Entry& b) const {
return a.second < b.second;
}
};

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
static inline T ref_helper(const T& t) { return t; }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
static inline const T* ref_helper(const T& t) { return std::addressof(t); }

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
static inline Entry make_dummy_entry(uint64_t weight) { return Entry(0, weight); }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }

uint64_t total_weight_;
Container entries_;
};

template<typename T, typename C, typename A>
class quantile_sketch_sorted_view<T, C, A>::const_iterator: public quantile_sketch_sorted_view<T, C, A>::Container::const_iterator {
public:
using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;

const_iterator(const Base& it): Base(it) {}

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
value_type operator*() const { return Base::operator*(); }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }

class return_value_holder {
public:
return_value_holder(value_type value): value_(value) {}
const value_type* operator->() const { return &value_; }
private:
value_type value_;
};

template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
const value_type* operator->() const { return Base::operator->(); }

template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
return_value_holder operator->() const { return **this; }
};

} /* namespace datasketches */

#include "quantile_sketch_sorted_view_impl.hpp"

#endif
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
#define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_

#include <algorithm>
#include <stdexcept>

namespace datasketches {

template<typename T, typename C, typename A>
quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(uint32_t num, const A& allocator):
total_weight_(0),
entries_(allocator)
{
entries_.reserve(num);
}

template<typename T, typename C, typename A>
template<typename Iterator>
void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
const size_t size_before = entries_.size();
for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
if (size_before > 0) {
Container tmp(entries_.get_allocator());
tmp.reserve(entries_.capacity());
std::merge(
entries_.begin(), entries_.begin() + size_before,
entries_.begin() + size_before, entries_.end(),
std::back_inserter(tmp), compare_pairs_by_first()
);
std::swap(tmp, entries_);
}
}

template<typename T, typename C, typename A>
template<bool inclusive>
void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
uint64_t subtotal = 0;
for (auto& entry: entries_) {
const uint64_t new_subtotal = subtotal + entry.second;
entry.second = inclusive ? new_subtotal : subtotal;
subtotal = new_subtotal;
}
total_weight_ = subtotal;
}

template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
return deref_helper(it->first);
}

template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
return entries_.begin();
}

template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
return entries_.end();
}

template<typename T, typename C, typename A>
size_t quantile_sketch_sorted_view<T, C, A>::size() const {
return entries_.size();
}

} /* namespace datasketches */

#endif
@@ -23,6 +23,7 @@
#define CPC_COMPRESSOR_IMPL_HPP_

#include <memory>
#include <stdexcept>

#include "compression_data.hpp"
#include "cpc_util.hpp"
@@ -23,6 +23,7 @@
#define CPC_CONFIDENCE_HPP_

#include <cmath>
#include <stdexcept>

#include "cpc_sketch.hpp"

@@ -22,6 +22,8 @@

#include "count_zeros.hpp"

#include <stdexcept>

namespace datasketches {

template<typename A>
@@ -21,6 +21,7 @@
#include <cstring>
#include <sstream>
#include <fstream>
#include <stdexcept>

#include <catch.hpp>

@@ -21,6 +21,8 @@

#include "cpc_union.hpp"

#include <stdexcept>

namespace datasketches {

static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02;

0 comments on commit 7e36654

Please sign in to comment.