Skip to content

Commit

Permalink
Fix NaN handling in map_subset (#9893)
Browse files Browse the repository at this point in the history
Summary:

Ensure NaNs values of any binary representations are treated as equal and
can be identified as keys in a map.

Differential Revision: D57681657
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed May 22, 2024
1 parent 941302f commit 4d2b732
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 5 deletions.
3 changes: 2 additions & 1 deletion velox/docs/functions/presto/map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ Map Functions

.. function:: map_subset(map(K,V), array(k)) -> map(K,V)

Constructs a map from those entries of ``map`` for which the key is in the array given::
Constructs a map from those entries of ``map`` for which the key is in the array given
For keys containing REAL and DOUBLE, NANs (Not-a-Number) are considered equal. ::

SELECT map_subset(MAP(ARRAY[1,2], ARRAY['a','b']), ARRAY[10]); -- {}
SELECT map_subset(MAP(ARRAY[1,2], ARRAY['a','b']), ARRAY[1]); -- {1->'a'}
Expand Down
3 changes: 2 additions & 1 deletion velox/functions/prestosql/MapSubset.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "velox/expression/ComplexViewTypes.h"
#include "velox/functions/Udf.h"
#include "velox/type/FloatingPointUtil.h"

namespace facebook::velox::functions {

Expand Down Expand Up @@ -84,7 +85,7 @@ struct MapSubsetPrimitiveFunction {
}

bool constantSearchKeys_{false};
folly::F14FastSet<arg_type<Key>> searchKeys_;
util::floating_point::HashSetNaNAware<arg_type<Key>> searchKeys_;
};

/// Fast path for constant string keys: map_subset(m, array['a', 'b', 'c']).
Expand Down
67 changes: 66 additions & 1 deletion velox/functions/prestosql/tests/MapSubsetTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,67 @@ using namespace facebook::velox::test;
namespace facebook::velox::functions {
namespace {

class MapSubsetTest : public test::FunctionBaseTest {};
class MapSubsetTest : public test::FunctionBaseTest {
public:
template <typename T>
void testFloatNaNs() {
static const auto kNaN = std::numeric_limits<T>::quiet_NaN();
static const auto kSNaN = std::numeric_limits<T>::signaling_NaN();

// Case 1: Non-constant search keys.
auto data = makeRowVector(
{makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 3:null, 4:40, 5:50, 6:60}",
"{NaN:20}",
}),
makeArrayVector<T>({{1, kNaN, 5}, {kSNaN, 3}})});

auto expected = makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 5:50}",
"{NaN:20}",
});
auto result = evaluate("map_subset(c0, c1)", data);
assertEqualVectors(expected, result);

// Case 2: Constant search keys.
data = makeRowVector(
{makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 3:null, 4:40, 5:50, 6:60}",
"{NaN:20}",
}),
BaseVector::wrapInConstant(2, 0, makeArrayVector<T>({{1, kNaN, 5}}))});
expected = makeMapVectorFromJson<T, int32_t>({
"{1:10, NaN:20, 5:50}",
"{NaN:20}",
});
result = evaluate("map_subset(c0, c1)", data);
assertEqualVectors(expected, result);

// Case 3: Map with Complex type as key.
// Map: { [{1, NaN,3}: 1, {4, 5}: 2], [{NaN, 3}: 3, {1, 2}: 4] }
data = makeRowVector({
makeMapVector(
{0, 2},
makeArrayVector<T>({{1, kNaN, 3}, {4, 5}, {kSNaN, 3}, {1, 2}}),
makeFlatVector<int32_t>({1, 2, 3, 4})),
makeNestedArrayVectorFromJson<T>({
"[[1, NaN, 3], [4, 5]]",
"[[1, 2, 3], [NaN, 3]]",
}),
});
expected = makeMapVector(
{0, 2},
makeArrayVectorFromJson<T>({
"[1, NaN, 3]",
"[4, 5]",
"[NaN, 3]",
}),
makeFlatVector<int32_t>({1, 2, 3}));

result = evaluate("map_subset(c0, c1)", data);
assertEqualVectors(expected, result);
}
};

TEST_F(MapSubsetTest, bigintKey) {
auto data = makeRowVector({
Expand Down Expand Up @@ -133,5 +193,10 @@ TEST_F(MapSubsetTest, arrayKey) {
assertEqualVectors(expected, result);
}

TEST_F(MapSubsetTest, floatNaNs) {
testFloatNaNs<float>();
testFloatNaNs<double>();
}

} // namespace
} // namespace facebook::velox::functions
11 changes: 9 additions & 2 deletions velox/vector/SimpleVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "velox/functions/lib/string/StringCore.h"
#include "velox/type/DecimalUtil.h"
#include "velox/type/FloatingPointUtil.h"
#include "velox/type/Type.h"
#include "velox/vector/BaseVector.h"
#include "velox/vector/TypeAliases.h"
Expand Down Expand Up @@ -171,8 +172,14 @@ class SimpleVector : public BaseVector {
* @return the hash of the value at the given index in this vector
*/
uint64_t hashValueAt(vector_size_t index) const override {
return isNullAt(index) ? BaseVector::kNullHash
: folly::hasher<T>{}(valueAt(index));
if (isNullAt(index)) {
return BaseVector::kNullHash;
}
if constexpr (std::is_floating_point_v<T>) {
return util::floating_point::NaNAwareHash<T>{}(valueAt(index));
} else {
return folly::hasher<T>{}(valueAt(index));
}
}

std::optional<bool> isSorted() const {
Expand Down
1 change: 1 addition & 0 deletions velox/vector/tests/utils/VectorMaker.h
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,7 @@ class VectorMaker {
folly::json::serialization_opts options;
options.convert_int_keys = true;
options.allow_non_string_keys = true;
options.allow_nan_inf = true;
folly::dynamic mapObject = folly::parseJson(jsonMap, options);
if (mapObject.isNull()) {
// Null map.
Expand Down

0 comments on commit 4d2b732

Please sign in to comment.