Skip to content

Commit

Permalink
Merge pull request #391 from apache/java_serde_compat_testing
Browse files Browse the repository at this point in the history
removed static tests replaced by dynamic ones
  • Loading branch information
AlexanderSaydakov committed Aug 24, 2023
2 parents 2c2b89e + 4f57d2e commit 52588ac
Show file tree
Hide file tree
Showing 43 changed files with 183 additions and 637 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/serde_compat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ jobs:
repository: apache/datasketches-java
path: java
- name: Run Java
run: cd java && mvn test -Dtestng.excludedgroups=
run: cd java && mvn test -P generate-java-files
- name: Copy files
run: cp java/target/java_generated_files/*.sk java
- name: Run cmake
run: cd build && cmake .. -DSERDE_COMPAT=true
- name: Build C++ unit tests
Expand Down
8 changes: 4 additions & 4 deletions cpc/test/cpc_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace datasketches {
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("cpc sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 100, 200, 2000, 20000};
for (unsigned n: n_arr) {
const unsigned n_arr[] = {0, 100, 200, 2000, 20000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = cpc_sketch::deserialize(is);
is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = cpc_sketch::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
Expand Down
20 changes: 10 additions & 10 deletions fi/test/frequent_items_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace datasketches {
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("frequent longs", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "frequent_long_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = frequent_items_sketch<int64_t>::deserialize(is);
is.open(testBinaryInputPath + "frequent_long_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = frequent_items_sketch<int64_t>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
if (n > 10) {
REQUIRE(sketch.get_maximum_error() > 0);
Expand All @@ -45,12 +45,12 @@ TEST_CASE("frequent longs", "[serde_compat]") {
}

TEST_CASE("frequent strings", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "frequent_string_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = frequent_items_sketch<std::string>::deserialize(is);
is.open(testBinaryInputPath + "frequent_string_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = frequent_items_sketch<std::string>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
if (n > 10) {
REQUIRE(sketch.get_maximum_error() > 0);
Expand All @@ -64,8 +64,8 @@ TEST_CASE("frequent strings", "[serde_compat]") {
TEST_CASE("frequent strings ascii", "[serde_compat]") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "frequent_string_ascii.sk", std::ios::binary);
auto sketch = frequent_items_sketch<std::string>::deserialize(is);
is.open(testBinaryInputPath + "frequent_string_ascii_java.sk", std::ios::binary);
const auto sketch = frequent_items_sketch<std::string>::deserialize(is);
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.get_maximum_error() == 0);
REQUIRE(sketch.get_total_weight() == 10);
Expand All @@ -78,8 +78,8 @@ TEST_CASE("frequent strings ascii", "[serde_compat]") {
TEST_CASE("frequent strings utf8", "[serde_compat]") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "frequent_string_utf8.sk", std::ios::binary);
auto sketch = frequent_items_sketch<std::string>::deserialize(is);
is.open(testBinaryInputPath + "frequent_string_utf8_java.sk", std::ios::binary);
const auto sketch = frequent_items_sketch<std::string>::deserialize(is);
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.get_maximum_error() == 0);
REQUIRE(sketch.get_total_weight() == 28);
Expand Down
3 changes: 3 additions & 0 deletions fi/test/frequent_items_sketch_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ TEST_CASE("frequent items: several items, no resize, no purge", "[frequent_items
REQUIRE(sketch.get_estimate("b") == 3);
REQUIRE(sketch.get_estimate("c") == 2);
REQUIRE(sketch.get_estimate("d") == 1);
REQUIRE(sketch.get_maximum_error() == 0);
}

TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_items_sketch]") {
Expand All @@ -96,6 +97,7 @@ TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_ite
REQUIRE(sketch.get_estimate("b") == 3);
REQUIRE(sketch.get_estimate("c") == 2);
REQUIRE(sketch.get_estimate("d") == 1);
REQUIRE(sketch.get_maximum_error() == 0);
}

TEST_CASE("frequent items: estimation mode", "[frequent_items_sketch]") {
Expand Down Expand Up @@ -149,6 +151,7 @@ TEST_CASE("frequent items: merge exact mode", "[frequent_items_sketch]") {
REQUIRE(sketch1.get_estimate(2) == 3);
REQUIRE(sketch1.get_estimate(3) == 2);
REQUIRE(sketch1.get_estimate(4) == 1);
REQUIRE(sketch1.get_maximum_error() == 0);
}

TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") {
Expand Down
68 changes: 0 additions & 68 deletions hll/test/ToFromByteArrayTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,74 +53,6 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
}
}

TEST_CASE("hll to/from byte array: deserialize from java", "[hll_byte_array]") {
std::string inputPath;
#ifdef TEST_BINARY_INPUT_PATH
inputPath = TEST_BINARY_INPUT_PATH;
#else
inputPath = "test/";
#endif

std::ifstream ifs;
ifs.open(inputPath + "list_from_java.sk", std::ios::binary);
hll_sketch sk = hll_sketch::deserialize(ifs);
REQUIRE(sk.is_empty() == false);
REQUIRE(sk.get_lg_config_k() == 8);
REQUIRE(sk.get_lower_bound(1) == 7.0);
REQUIRE(sk.get_estimate() == Approx(7.0).margin(1e-6));
REQUIRE(sk.get_upper_bound(1) == Approx(7.000350).margin(1e-5));
ifs.close();

ifs.open(inputPath + "compact_set_from_java.sk", std::ios::binary);
sk = hll_sketch::deserialize(ifs);
REQUIRE(sk.is_empty() == false);
REQUIRE(sk.get_lg_config_k() == 8);
REQUIRE(sk.get_lower_bound(1) == 24.0);
REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
ifs.close();

ifs.open(inputPath + "updatable_set_from_java.sk", std::ios::binary);
sk = hll_sketch::deserialize(ifs);
REQUIRE(sk.is_empty() == false);
REQUIRE(sk.get_lg_config_k() == 8);
REQUIRE(sk.get_lower_bound(1) == 24.0);
REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
ifs.close();


ifs.open(inputPath + "array6_from_java.sk", std::ios::binary);
sk = hll_sketch::deserialize(ifs);
REQUIRE(sk.is_empty() == false);
REQUIRE(sk.get_lg_config_k() == 8);
REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
ifs.close();


ifs.open(inputPath + "compact_array4_from_java.sk", std::ios::binary);
sk = hll_sketch::deserialize(ifs);
REQUIRE(sk.is_empty() == false);
REQUIRE(sk.get_lg_config_k() == 8);
REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));

ifs.close();


ifs.open(inputPath + "updatable_array4_from_java.sk", std::ios::binary);
sk = hll_sketch::deserialize(ifs);
REQUIRE(sk.is_empty() == false);
REQUIRE(sk.get_lg_config_k() == 8);
REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
ifs.close();
}

static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
REQUIRE(sk1.get_lg_config_k() == sk2.get_lg_config_k());
REQUIRE(sk1.get_lower_bound(1) == sk2.get_lower_bound(1));
Expand Down
Binary file removed hll/test/array6_from_java.sk
Binary file not shown.
Binary file removed hll/test/compact_array4_from_java.sk
Binary file not shown.
Binary file removed hll/test/compact_set_from_java.sk
Binary file not shown.
24 changes: 12 additions & 12 deletions hll/test/hll_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,38 @@ namespace datasketches {
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("hll4 sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "hll4_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = hll_sketch::deserialize(is);
is.open(testBinaryInputPath + "hll4_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = hll_sketch::deserialize(is);
REQUIRE(sketch.get_lg_config_k() == 12);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
}

TEST_CASE("hll6 sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "hll6_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = hll_sketch::deserialize(is);
is.open(testBinaryInputPath + "hll6_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = hll_sketch::deserialize(is);
REQUIRE(sketch.get_lg_config_k() == 12);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
}

TEST_CASE("hll8 sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "hll8_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = hll_sketch::deserialize(is);
is.open(testBinaryInputPath + "hll8_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = hll_sketch::deserialize(is);
REQUIRE(sketch.get_lg_config_k() == 12);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
Expand Down
Binary file removed hll/test/list_from_java.sk
Binary file not shown.
Binary file removed hll/test/updatable_array4_from_java.sk
Binary file not shown.
Binary file removed hll/test/updatable_set_from_java.sk
Binary file not shown.
37 changes: 31 additions & 6 deletions kll/test/kll_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace datasketches {
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("kll float", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_float_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = kll_sketch<float>::deserialize(is);
is.open(testBinaryInputPath + "kll_float_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = kll_sketch<float>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
Expand All @@ -52,12 +52,12 @@ TEST_CASE("kll float", "[serde_compat]") {
}

TEST_CASE("kll double", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_double_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = kll_sketch<double>::deserialize(is);
is.open(testBinaryInputPath + "kll_double_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = kll_sketch<double>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
Expand All @@ -75,4 +75,29 @@ TEST_CASE("kll double", "[serde_compat]") {
}
}

// numbers are padded with leading spaces so that natural order works
TEST_CASE("kll string", "[serde_compat]") {
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_string_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = kll_sketch<std::string>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
if (n > 0) {
REQUIRE(std::stoul(sketch.get_min_item()) == 1);
REQUIRE(std::stoul(sketch.get_max_item()) == n);
uint64_t weight = 0;
for (const auto pair: sketch) {
REQUIRE(pair.first >= sketch.get_min_item());
REQUIRE(pair.first <= sketch.get_max_item());
weight += pair.second;
}
REQUIRE(weight == sketch.get_n());
}
}
}

} /* namespace datasketches */
12 changes: 6 additions & 6 deletions quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace datasketches {
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("quantiles double", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "quantiles_double_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = quantiles_sketch<double>::deserialize(is);
is.open(testBinaryInputPath + "quantiles_double_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = quantiles_sketch<double>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
Expand All @@ -58,12 +58,12 @@ struct string_as_number_less {
};

TEST_CASE("quantiles string", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "quantiles_string_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = quantiles_sketch<std::string, string_as_number_less>::deserialize(is);
is.open(testBinaryInputPath + "quantiles_string_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = quantiles_sketch<std::string, string_as_number_less>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
Expand Down
Binary file removed req/test/req_float_empty_from_java.sk
Binary file not shown.
Binary file removed req/test/req_float_estimation_from_java.sk
Binary file not shown.
Binary file removed req/test/req_float_exact_from_java.sk
Binary file not shown.
Binary file removed req/test/req_float_raw_items_from_java.sk
Binary file not shown.
Binary file removed req/test/req_float_single_item_from_java.sk
Binary file not shown.
6 changes: 3 additions & 3 deletions req/test/req_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace datasketches {
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("req float", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (const unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = req_sketch<float>::deserialize(is);
is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + "_java.sk", std::ios::binary);
const auto sketch = req_sketch<float>::deserialize(is);
REQUIRE(sketch.is_HRA());
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > 10));
Expand Down

0 comments on commit 52588ac

Please sign in to comment.