Skip to content
Permalink
Browse files
Merge pull request #269 from apache/serde_instance
Serde instance
  • Loading branch information
jmalkin committed May 2, 2022
2 parents 5c6aa2b + f7693eb commit 1c745cb2f829a856b498872cc6402389b0c80f61
Showing 15 changed files with 394 additions and 131 deletions.
@@ -46,7 +46,7 @@ template<
typename W = uint64_t,
typename H = std::hash<T>,
typename E = std::equal_to<T>,
typename S = serde<T>,
typename S = serde<T>, // deprecated, to be removed in the next major version
typename A = std::allocator<T>
>
class frequent_items_sketch {
@@ -225,46 +225,78 @@ class frequent_items_sketch {
/**
* Computes size needed to serialize the current state of the sketch.
* This can be expensive since every item needs to be looked at.
* @param instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
size_t get_serialized_size_bytes() const;
template<typename SerDe = S>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;

/**
* This method serializes the sketch into a given stream in a binary form
* @param os output stream
* @param instance of a SerDe
*/
void serialize(std::ostream& os) const;
template<typename SerDe = S>
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;

// This is a convenience alias for users
// The type returned by the following serialize method
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;


/**
* This method serializes the sketch as a vector of bytes.
* An optional header can be reserved in front of the sketch.
* It is a blank space of a given size.
* This header is used in Datasketches PostgreSQL extension.
* @param header_size_bytes space to reserve in front of the sketch
* @param instance of a SerDe
* @return serialized sketch as a vector of bytes
*/
vector_bytes serialize(unsigned header_size_bytes = 0) const;
template<typename SerDe = S>
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;

/**
* This method deserializes a sketch from a given stream.
* @param is input stream
* @param instance of an Allocator
* @return an instance of the sketch
*
* Deprecated, to be removed in the next major version
*/
static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());

/**
* This method deserializes a sketch from a given stream.
* @param is input stream
* @param instance of a SerDe
* @param instance of an Allocator
* @return an instance of the sketch
*/
template<typename SerDe = S>
static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());

/**
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
* @param size the size of the array
* @param instance of an Allocator
* @return an instance of the sketch
*
* Deprecated, to be removed in the next major version
*/
static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());

/**
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
* @param size the size of the array
* @param instance of a SerDe
* @param instance of an Allocator
* @return an instance of the sketch
*/
template<typename SerDe = S>
static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());

/**
* Returns a human readable summary of this sketch
* @param print_items if true include the list of items retained by the sketch
@@ -161,7 +161,8 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
}

template<typename T, typename W, typename H, typename E, typename S, typename A>
void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const {
template<typename SerDe>
void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
write(os, preamble_longs);
const uint8_t serial_version = SERIAL_VERSION;
@@ -199,23 +200,25 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
}
write(os, weights, sizeof(W) * num_items);
aw.deallocate(weights, num_items);
S().serialize(os, items, num_items);
sd.serialize(os, items, num_items);
for (i = 0; i < num_items; i++) items[i].~T();
alloc.deallocate(items, num_items);
}
}

template<typename T, typename W, typename H, typename E, typename S, typename A>
size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() const {
template<typename SerDe>
size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
for (auto it: map) size += S().size_of_item(it.first);
for (auto it: map) size += sd.size_of_item(it.first);
return size;
}

template<typename T, typename W, typename H, typename E, typename S, typename A>
auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
const size_t size = header_size_bytes + get_serialized_size_bytes();
template<typename SerDe>
auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
vector_bytes bytes(size, 0, map.get_allocator());
uint8_t* ptr = bytes.data() + header_size_bytes;
uint8_t* end_ptr = ptr + size;
@@ -256,7 +259,7 @@ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_byt
ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
aw.deallocate(weights, num_items);
const size_t bytes_remaining = end_ptr - ptr;
ptr += S().serialize(ptr, bytes_remaining, items, num_items);
ptr += sd.serialize(ptr, bytes_remaining, items, num_items);
for (i = 0; i < num_items; i++) items[i].~T();
alloc.deallocate(items, num_items);
}
@@ -285,6 +288,12 @@ class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {

template<typename T, typename W, typename H, typename E, typename S, typename A>
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
return deserialize(is, S(), allocator);
}

template<typename T, typename W, typename H, typename E, typename S, typename A>
template<typename SerDe>
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
const auto preamble_longs = read<uint8_t>(is);
const auto serial_version = read<uint8_t>(is);
const auto family_id = read<uint8_t>(is);
@@ -313,7 +322,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
read(is, weights.data(), sizeof(W) * num_items);
A alloc(allocator);
std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
S().deserialize(is, items.get(), num_items);
sd.deserialize(is, items.get(), num_items);
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
for (uint32_t i = 0; i < num_items; i++) {
sketch.update(std::move(items.get()[i]), weights[i]);
@@ -328,6 +337,12 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:

template<typename T, typename W, typename H, typename E, typename S, typename A>
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
return deserialize(bytes, size, S(), allocator);
}

template<typename T, typename W, typename H, typename E, typename S, typename A>
template<typename SerDe>
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
ensure_minimum_memory(size, 8);
const char* ptr = static_cast<const char*>(bytes);
const char* base = static_cast<const char*>(bytes);
@@ -371,7 +386,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
A alloc(allocator);
std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
const size_t bytes_remaining = size - (ptr - base);
ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
ptr += sd.deserialize(ptr, bytes_remaining, items.get(), num_items);
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
for (uint32_t i = 0; i < num_items; i++) {
sketch.update(std::move(items.get()[i]), weights[i]);
@@ -60,7 +60,7 @@ TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
REQUIRE(sketch.get_maximum_error() == sketch2.get_maximum_error());

auto bytes = sketch.serialize();
auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), 0);
auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), alloc(0));
REQUIRE_FALSE(sketch3.is_empty());
REQUIRE(sketch3.get_total_weight() == 17);
REQUIRE(sketch3.get_estimate(1) == 10);
@@ -161,7 +161,7 @@ namespace kll_constants {
template <
typename T,
typename C = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
typename S = serde<T>,
typename S = serde<T>, // deprecated, to be removed in the next major version
typename A = std::allocator<T>
>
class kll_sketch {
@@ -386,16 +386,17 @@ class kll_sketch {
* This version is for fixed-size arithmetic types (integral and floating point).
* @return size in bytes needed to serialize this sketch
*/
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes() const;
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;

/**
* Computes size needed to serialize the current state of the sketch.
* This version is for all other types and can be expensive since every item needs to be looked at.
* @param instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes() const;
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;

/**
* Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
@@ -427,8 +428,11 @@ class kll_sketch {
/**
* This method serializes the sketch into a given stream in a binary form
* @param os output stream
* @param instance of a SerDe
*
*/
void serialize(std::ostream& os) const;
template<typename SerDe = S>
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;

// This is a convenience alias for users
// The type returned by the following serialize method
@@ -440,24 +444,53 @@ class kll_sketch {
* It is a blank space of a given size.
* This header is used in Datasketches PostgreSQL extension.
* @param header_size_bytes space to reserve in front of the sketch
* @param instance of a SerDe
*/
vector_bytes serialize(unsigned header_size_bytes = 0) const;
template<typename SerDe = S>
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;

/**
* This method deserializes a sketch from a given stream.
* @param is input stream
* @param instance of an Allocator
* @return an instance of a sketch
*
* Deprecated, to be removed in the next major version
*/
static kll_sketch<T, C, S, A> deserialize(std::istream& is, const A& allocator = A());

/**
* This method deserializes a sketch from a given stream.
* @param is input stream
* @param instance of a SerDe
* @param instance of an Allocator
* @return an instance of a sketch
*/
template<typename SerDe = S>
static kll_sketch<T, C, S, A> deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());

/**
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
* @param size the size of the array
* @param instance of an Allocator
* @return an instance of a sketch
*
* Deprecated, to be removed in the next major version
*/
static kll_sketch<T, C, S, A> deserialize(const void* bytes, size_t size, const A& allocator = A());

/**
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
* @param size the size of the array
* @param instance of a SerDe
* @param instance of an Allocator
* @return an instance of a sketch
*/
template<typename SerDe = S>
static kll_sketch<T, C, S, A> deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());

/*
* Gets the normalized rank error given k and pmf.
* k - the configuration parameter

0 comments on commit 1c745cb

Please sign in to comment.