Skip to content

Commit

Permalink
Add tests for join and aggregation related to NaN handling (facebooki…
Browse files Browse the repository at this point in the history
…ncubator#10125)

Summary:
Pull Request resolved: facebookincubator#10125

Hash join and hash aggregation employ data structures like
VectorHasher, RowContainer and ContainerRowSerde apart from the usual
BaseVector to implement a custom hash table and store intermediate
state. All these have been updated in facebookincubator#9963 to handle NaN values
where all binary representations of the same are considered equal
and hash to the same value. Therefore, this change only adds unit
tests to verify the expected NaN behavior.

Differential Revision: D58365384
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed Jun 14, 2024
1 parent 3ac08d5 commit 257d901
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
36 changes: 36 additions & 0 deletions velox/exec/tests/AggregationTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3671,4 +3671,40 @@ TEST_F(AggregationTest, destroyAfterPartialInitialization) {
ASSERT_TRUE(agg.destroyCalled);
}

TEST_F(AggregationTest, nanKeys) {
// Some keys are NaNs.
auto kNaN = std::numeric_limits<double>::quiet_NaN();
auto kSNaN = std::numeric_limits<double>::signaling_NaN();
// Columns reused across test cases.
auto c0 = makeFlatVector<double>({kNaN, 1, kNaN, 2, kSNaN, 1, 2});
auto c1 = makeFlatVector<int32_t>({1, 1, 1, 1, 1, 1, 1});
// Expected result columns reused across test cases. A deduplicated version of
// c0 and c1.
auto e0 = makeFlatVector<double>({1, 2, kNaN});
auto e1 = makeFlatVector<int32_t>({1, 1, 1});

auto testDistinctAgg = [&](std::vector<std::string> aggKeys,
std::vector<VectorPtr> inputCols,
std::vector<VectorPtr> expectedCols) {
auto plan = PlanBuilder()
.values({makeRowVector(inputCols)})
.singleAggregation(aggKeys, {}, {})
.planNode();
AssertQueryBuilder(plan).assertResults(makeRowVector(expectedCols));
};

// Test with a primitive type key.
testDistinctAgg({"c0"}, {c0}, {e0});
// Multiple key columns.
testDistinctAgg({"c0", "c1"}, {c0, c1}, {e0, e1});

// Test with a complex type key.
testDistinctAgg({"c0"}, {makeRowVector({c0, c1})}, {makeRowVector({e0, e1})});
// Multiple key columns.
testDistinctAgg(
{"c0", "c1"},
{makeRowVector({c0, c1}), c1},
{makeRowVector({e0, e1}), e1});
}

} // namespace facebook::velox::exec::test
34 changes: 34 additions & 0 deletions velox/exec/tests/HashJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7797,4 +7797,38 @@ DEBUG_ONLY_TEST_F(HashJoinTest, spillCheckOnLeftSemiFilterWithDynamicFilters) {
})
.run();
}

TEST_F(HashJoinTest, nanKeys) {
// Verify the NaN values with different binary representations are considered
// equal.
static const double kNan = std::numeric_limits<double>::quiet_NaN();
static const double kSNaN = std::numeric_limits<double>::signaling_NaN();
auto probeInput = makeRowVector(
{makeFlatVector<double>({kNan, kSNaN}), makeFlatVector<int64_t>({1, 2})});
auto buildInput = makeRowVector({makeFlatVector<double>({kNan, 1})});

auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
auto plan = PlanBuilder(planNodeIdGenerator)
.values({probeInput})
.project({"c0 AS t0", "c1 AS t1"})
.hashJoin(
{"t0"},
{"u0"},
PlanBuilder(planNodeIdGenerator)
.values({buildInput})
.project({"c0 AS u0"})
.planNode(),
"",
{"t0", "u0", "t1"},
core::JoinType::kLeft)
.planNode();
auto queryCtx = core::QueryCtx::create(executor_.get());
auto result =
AssertQueryBuilder(plan).queryCtx(queryCtx).copyResults(pool_.get());
auto expected = makeRowVector(
{makeFlatVector<double>({kNan, kNan}),
makeFlatVector<double>({kNan, kNan}),
makeFlatVector<int64_t>({1, 2})});
facebook::velox::test::assertEqualVectors(expected, result);
}
} // namespace

0 comments on commit 257d901

Please sign in to comment.