Skip to content

Commit

Permalink
Merge pull request #389 from apache/java_serde_compat_testing
Browse files Browse the repository at this point in the history
SerDe compatibility tests
  • Loading branch information
AlexanderSaydakov committed Aug 4, 2023
2 parents 9b9f871 + 9df75a6 commit 7f3c659
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 0 deletions.
7 changes: 7 additions & 0 deletions hll/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,10 @@ target_sources(hll_test
ToFromByteArrayTest.cpp
IsomorphicTest.cpp
)

if (SERDE_COMPAT)
target_sources(hll_test
PRIVATE
hll_sketch_deserialize_from_java_test.cpp
)
endif()
69 changes: 69 additions & 0 deletions hll/test/hll_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <catch2/catch.hpp>
#include <fstream>
#include <hll.hpp>

namespace datasketches {

// assume the binary sketches for this test have been generated by datasketches-java code
// in the subdirectory called "java" in the root directory of this project
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("hll4 sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "hll4_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = hll_sketch::deserialize(is);
REQUIRE(sketch.get_lg_config_k() == 12);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
}

TEST_CASE("hll6 sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "hll6_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = hll_sketch::deserialize(is);
REQUIRE(sketch.get_lg_config_k() == 12);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
}

TEST_CASE("hll8 sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "hll8_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = hll_sketch::deserialize(is);
REQUIRE(sketch.get_lg_config_k() == 12);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
}

} /* namespace datasketches */
7 changes: 7 additions & 0 deletions req/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,10 @@ target_sources(req_test
req_sketch_test.cpp
req_sketch_custom_type_test.cpp
)

if (SERDE_COMPAT)
target_sources(req_test
PRIVATE
req_sketch_deserialize_from_java_test.cpp
)
endif()
55 changes: 55 additions & 0 deletions req/test/req_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <catch2/catch.hpp>
#include <fstream>
#include <req_sketch.hpp>

namespace datasketches {

// assume the binary sketches for this test have been generated by datasketches-java code
// in the subdirectory called "java" in the root directory of this project
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("req float", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = req_sketch<float>::deserialize(is);
REQUIRE(sketch.is_HRA());
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > 10));
REQUIRE(sketch.get_n() == n);
if (n > 0) {
REQUIRE(sketch.get_min_item() == 0.0);
REQUIRE(sketch.get_max_item() == static_cast<float>(n - 1));
uint64_t weight = 0;
for (auto pair: sketch) {
REQUIRE(pair.first >= sketch.get_min_item());
REQUIRE(pair.first <= sketch.get_max_item());
weight += pair.second;
}
REQUIRE(weight == sketch.get_n());
}
}
}

} /* namespace datasketches */
7 changes: 7 additions & 0 deletions theta/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,10 @@ target_sources(theta_test
theta_setop_test.cpp
bit_packing_test.cpp
)

if (SERDE_COMPAT)
target_sources(theta_test
PRIVATE
theta_sketch_deserialize_from_java_test.cpp
)
endif()
57 changes: 57 additions & 0 deletions theta/test/theta_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <catch2/catch.hpp>
#include <fstream>
#include <theta_sketch.hpp>

namespace datasketches {

// assume the binary sketches for this test have been generated by datasketches-java code
// in the subdirectory called "java" in the root directory of this project
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("theta sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "theta_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = compact_theta_sketch::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
for (auto hash: sketch) {
REQUIRE(hash < sketch.get_theta64());
}
REQUIRE(sketch.is_ordered());
REQUIRE(std::is_sorted(sketch.begin(), sketch.end()));
}
}

TEST_CASE("theta sketch non-empty no entries", "[serde_compat]") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "theta_non_empty_no_entries.sk", std::ios::binary);
auto sketch = compact_theta_sketch::deserialize(is);
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.get_num_retained() == 0);
}

} /* namespace datasketches */
7 changes: 7 additions & 0 deletions tuple/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,10 @@ target_sources(tuple_test
array_of_doubles_sketch_test.cpp
engagement_test.cpp
)

if (SERDE_COMPAT)
target_sources(tuple_test
PRIVATE
aod_sketch_deserialize_from_java_test.cpp
)
endif()
75 changes: 75 additions & 0 deletions tuple/test/aod_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <catch2/catch.hpp>
#include <fstream>
#include <array_of_doubles_sketch.hpp>

namespace datasketches {

// assume the binary sketches for this test have been generated by datasketches-java code
// in the subdirectory called "java" in the root directory of this project
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("aod sketch one value", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "aod_1_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = compact_array_of_doubles_sketch::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
REQUIRE(sketch.get_num_values() == 1);
for (const auto& entry: sketch) {
REQUIRE(entry.first < sketch.get_theta64());
}
}
}

TEST_CASE("aod sketch three values", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "aod_3_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = compact_array_of_doubles_sketch::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
REQUIRE(sketch.get_num_values() == 3);
for (const auto& entry: sketch) {
REQUIRE(entry.first < sketch.get_theta64());
REQUIRE(entry.second[0] == entry.second[1]);
REQUIRE(entry.second[0] == entry.second[2]);
}
}
}

TEST_CASE("aod sketch non-empty no entries", "[serde_compat]") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "aod_1_non_empty_no_entries.sk", std::ios::binary);
auto sketch = compact_array_of_doubles_sketch::deserialize(is);
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.get_num_retained() == 0);
}

} /* namespace datasketches */

0 comments on commit 7f3c659

Please sign in to comment.