Skip to content
Permalink
Browse files
Merge pull request #235 from apache/add_sdist_build
Add sdist build
  • Loading branch information
jmalkin committed Sep 7, 2021
2 parents ed37a7a + fb52372 commit 18455ec2bcb96673a3c9ac014865d261740c0bc1
Show file tree
Hide file tree
Showing 22 changed files with 121 additions and 49 deletions.
@@ -12,6 +12,30 @@ env:
BUILD_TYPE: Release

jobs:
build_sdist:
name: Source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true
persist-credentials: false

- uses: actions/setup-python@v2
name: Install Python
with:
python-version: '3.8'

- name: Install Python dependencies
run: python -m pip install --upgrade pip setuptools wheel numpy tox pytest build

- name: Build sdist
run: python setup.py sdist

- uses: actions/upload-artifact@v2
with:
path: dist/*.tar.gz

build_wheels:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
@@ -46,16 +70,17 @@ jobs:
- name: Set up Python 3.x
uses: actions/setup-python@v2
with:
python-version: '3.8' # 3.x grabs latest minor version of python3, but not always fully supported (dependencies)
python-version: '3.8'

- name: Install Python dependencies
run: python -m pip install --upgrade pip setuptools wheel numpy tox pytest cibuildwheel==2.1.1 twine
run: python -m pip install --upgrade pip setuptools wheel numpy tox pytest build cibuildwheel==2.1.1 twine

- name: Build wheels
run: python -m cibuildwheel --output-dir wheelhouse
run: python -m cibuildwheel --output-dir dist
env:
CIBW_SKIP: "*-win32"
CIBW_ARCHS_MACOS: "x86_64 universal2"

- uses: actions/upload-artifact@v2
with:
path: ./wheelhouse/*.whl
path: ./dist/*.whl
@@ -1,3 +0,0 @@
[submodule "python/pybind11"]
path = python/pybind11
url = https://github.com/pybind/pybind11
@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
#set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_MACOSX_RPATH ON)

set(CMAKE_CXX_STANDARD 11)

# enable compiler warnings globally
# derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
# and https://arne-mertz.de/2018/07/cmake-properties-options/
@@ -29,6 +29,8 @@ namespace datasketches {

static const uint64_t DEFAULT_SEED = 9001;

enum resize_factor { X1 = 0, X2, X4, X8 };

template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;

@@ -26,9 +26,16 @@

namespace datasketches {

static const uint8_t CPC_MIN_LG_K = 4;
static const uint8_t CPC_MAX_LG_K = 26;
static const uint8_t CPC_DEFAULT_LG_K = 11;
namespace cpc_constants {
const uint8_t MIN_LG_K = 4;
const uint8_t MAX_LG_K = 26;
const uint8_t DEFAULT_LG_K = 11;
}

// TODO: Redundant and deprecated. Will be removed in next major version release.
static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;

template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
@@ -67,7 +67,7 @@ class cpc_sketch_alloc {
* @param lg_k base 2 logarithm of the number of bins in the sketch
* @param seed for hash function
*/
explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
explicit cpc_sketch_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());

using allocator_type = A;
A get_allocator() const;
@@ -45,7 +45,7 @@ class cpc_union_alloc {
* @param lg_k base 2 logarithm of the number of bins in the sketch
* @param seed for hash function
*/
explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());

cpc_union_alloc(const cpc_union_alloc<A>& other);
cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
@@ -153,18 +153,23 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
template<typename A> using vector_d = std::vector<double, AllocD<A>>;

namespace kll_constants {
const uint16_t DEFAULT_K = 200;
}

template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
class kll_sketch {
public:
using value_type = T;
using comparator = C;

static const uint8_t DEFAULT_M = 8;
static const uint16_t DEFAULT_K = 200;
// TODO: Redundant and deprecated. Will be remove din next major version.
static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
static const uint16_t MIN_K = DEFAULT_M;
static const uint16_t MAX_K = (1 << 16) - 1;

explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
kll_sketch(const kll_sketch& other);
kll_sketch(kll_sketch&& other) noexcept;
~kll_sketch();
@@ -1,8 +1,10 @@
[build-system]
requires = ["wheel",
"setuptools >= 30.3.0",
"setuptools_scm",
"cmake >= 3.12"]
"cmake >= 3.12",
"pip >= 10.0",
"pybind11[global] >= 2.6.0"]
build-backend = "setuptools.build_meta"

[tool.tox]
legacy_tox_ini = """
@@ -12,6 +14,7 @@ envlist = py3
[testenv]
deps = pytest
numpy
pybind11 >= 2.6.0
changedir = python/tests
commands = pytest
"""
@@ -15,16 +15,20 @@
# specific language governing permissions and limitations
# under the License.

# TODO: Can we force python version >= 3.0?
find_package(Python3 COMPONENTS Interpreter Development)

# only Windows+MSVC seems to have trouble locating pybind11
if (MSVC)
set(PYBIND11_CPP_STANDARD /std:c++11)
else()
set(PYBIND11_CPP_STANDARD -std=c++11)
execute_process(COMMAND cmd.exe /c ${CMAKE_CURRENT_SOURCE_DIR}/pybind11Path.cmd "${Python3_EXECUTABLE}"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE EXTRA_PACKAGE_PATH)
set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${EXTRA_PACKAGE_PATH})
endif()

add_subdirectory(pybind11)
find_package(pybind11 CONFIG REQUIRED)

pybind11_add_module(python MODULE EXCLUDE_FROM_ALL SYSTEM THIN_LTO)
pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)

target_link_libraries(python
PRIVATE
Submodule pybind11 deleted from 59a2ac
@@ -0,0 +1,3 @@
@echo off
:: Takes path to the Python interpreter and returns the path to pybind11
%1 -m pip show pybind11 | %1 -c "import sys,re;[sys.stdout.write(re.sub('^Location:\\s+','',line)) for line in sys.stdin if re.search('^Location:\\s+',line)]"
@@ -53,7 +53,7 @@ void init_cpc(py::module &m) {
using namespace datasketches;

py::class_<cpc_sketch>(m, "cpc_sketch")
.def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=CPC_DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
.def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=cpc_constants::DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
.def(py::init<const cpc_sketch&>())
.def("__str__", &cpc_sketch::to_string,
"Produces a string summary of the sketch")
@@ -116,7 +116,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
using namespace datasketches;

py::class_<kll_sketch<T>>(m, name)
.def(py::init<uint16_t>(), py::arg("k")=kll_sketch<T>::DEFAULT_K)
.def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
.def(py::init<const kll_sketch<T>&>())
.def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
"Updates the sketch with the given value")
@@ -103,7 +103,7 @@ void init_theta(py::module &m) {

py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
.def(py::init(&dspy::update_theta_sketch_factory),
py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
.def(py::init<const update_theta_sketch&>())
.def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
"Updates the sketch with the given integral value")
@@ -127,7 +127,7 @@ void init_theta(py::module &m) {

py::class_<theta_union>(m, "theta_union")
.def(py::init(&dspy::theta_union_factory),
py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
.def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
"Updates the union with the given sketch")
.def("get_result", &theta_union::get_result, py::arg("ordered")=true,
@@ -29,14 +29,20 @@ namespace py = pybind11;

namespace datasketches {

namespace vector_of_kll_constants {
static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
static const uint32_t DEFAULT_D = 1;
}

// Wrapper class for Numpy compatibility
template <typename T, typename C = std::less<T>, typename S = serde<T>>
class vector_of_kll_sketches {
public:
static const uint32_t DEFAULT_K = kll_sketch<T, C, S>::DEFAULT_K;
static const uint32_t DEFAULT_D = 1;
// TODO: Redundant and deprecated. Will be removed in next major version release.
static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;

explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
vector_of_kll_sketches(const vector_of_kll_sketches& other);
vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
@@ -432,8 +438,8 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
using namespace datasketches;

py::class_<vector_of_kll_sketches<T>>(m, name)
.def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_sketches<T>::DEFAULT_K,
py::arg("d")=vector_of_kll_sketches<T>::DEFAULT_D)
.def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K,
py::arg("d")=vector_of_kll_constants::DEFAULT_D)
.def(py::init<const vector_of_kll_sketches<T>&>())
// allow user to retrieve k or d, in case it's instantiated w/ defaults
.def("get_k", &vector_of_kll_sketches<T>::get_k,
@@ -51,18 +51,23 @@ struct subset_summary {
double total_sketch_weight;
};

enum resize_factor { X1 = 0, X2, X4, X8 };

template <typename T, typename S, typename A> class var_opt_union; // forward declaration

namespace var_opt_constants {
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
}

template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
class var_opt_sketch {

public:
static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
static const uint32_t MAX_K = var_opt_constants::MAX_K;

explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
explicit var_opt_sketch(uint32_t k,
resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
const A& allocator = A());
var_opt_sketch(const var_opt_sketch& other);
var_opt_sketch(var_opt_sketch&& other) noexcept;

@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
r_(r_count),
n_(n),
total_wt_r_(total_wt_r),
rf_(DEFAULT_RESIZE_FACTOR),
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
curr_items_alloc_(len),
filled_data_(n > k),
allocator_(allocator),
@@ -49,8 +49,9 @@ def build_extension(self, ext):
os.path.dirname(self.get_ext_fullpath(ext.name)))
cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
cmake_args += ['-DWITH_PYTHON=True']
cmake_args += ['-DCMAKE_CXX_STANDARD=11']
# ensure we use a consistent python version
cmake_args += ['-DPYTHON_EXECUTABLE=' + sys.executable]
cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
cfg = 'Debug' if self.debug else 'Release'
build_args = ['--config', cfg]

@@ -59,7 +60,8 @@ def build_extension(self, ext):
cfg.upper(),
extdir)]
if sys.maxsize > 2**32:
cmake_args += ['-A', 'x64']
cmake_args += ['-T', 'host=x64']
cmake_args += ['-DCMAKE_GENERATOR_PLATFORM=x64']
build_args += ['--', '/m']
else:
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
@@ -74,7 +76,7 @@ def build_extension(self, ext):
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
cwd=self.build_temp, env=env)
subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
cwd=self.build_temp)
cwd=self.build_temp, env=env)
print() # add an empty line to pretty print

setup(
@@ -91,6 +93,6 @@ def build_extension(self, ext):
# may need to add all source paths for sdist packages w/o MANIFEST.in
ext_modules=[CMakeExtension('datasketches')],
cmdclass={'build_ext': CMakeBuild},
setup_requires=['setuptools_scm','tox-setuptools'],
install_requires=['numpy', 'pybind11 >= 2.6.0'],
zip_safe=False
)
@@ -21,14 +21,19 @@
#define THETA_CONSTANTS_HPP_

#include <climits>
#include "common_defs.hpp"

namespace datasketches {

namespace theta_constants {
enum resize_factor { X1, X2, X4, X8 };
static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
static const uint8_t MIN_LG_K = 5;
static const uint8_t MAX_LG_K = 26;
using resize_factor = datasketches::resize_factor;
//enum resize_factor { X1, X2, X4, X8 };
const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
const uint8_t MIN_LG_K = 5;
const uint8_t MAX_LG_K = 26;

const uint8_t DEFAULT_LG_K = 12;
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
}

} /* namespace datasketches */
@@ -94,11 +94,14 @@ struct theta_update_sketch_base {
template<typename Derived, typename Allocator>
class theta_base_builder {
public:
// TODO: Redundant and deprecated. Will be removed in next major verison release.
using resize_factor = theta_constants::resize_factor;
static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
static const uint8_t DEFAULT_LG_K = 12;
static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
// TODO: The following defaults are redundant and deprecated. Will be removed in the
// next major version release
static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;

/**
* Creates and instance of the builder with default parameters.
@@ -271,7 +271,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz

template<typename Derived, typename Allocator>
theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
allocator_(allocator),
lg_k_(theta_constants::DEFAULT_LG_K),
rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
p_(1),
seed_(DEFAULT_SEED) {}

template<typename Derived, typename Allocator>
Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {

0 comments on commit 18455ec

Please sign in to comment.