From ad79a14c9e5ec4a369eed4adf567c22cc029863f Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 20 Jun 2024 09:58:12 +0800 Subject: [PATCH] [ADT] Update hash function of uint64_t for DenseMap (#95734) (Background: See the comment: https://github.com/llvm/llvm-project/pull/92083#issuecomment-2168121729) It looks like the hash function for 64bits integers are not very good: ``` static unsigned getHashValue(const unsigned long long& Val) { return (unsigned)(Val * 37ULL); } ``` Since the result is truncated to 32 bits. It looks like the higher 32 bits won't contribute to the result. So that `0x1'00000001` will have the the same results to `0x2'00000001`, `0x3'00000001`, ... Then we may meet a lot collisions in such cases. I feel it should generally good to include higher 32 bits for hashing functions. Not sure who's the appropriate reviewer, adding some people by impressions. --- llvm/include/llvm/ADT/DenseMapInfo.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index 7e1525ac41c3a..4f0aaa1ad48bb 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -139,7 +139,10 @@ template<> struct DenseMapInfo { static inline unsigned long getTombstoneKey() { return ~0UL - 1L; } static unsigned getHashValue(const unsigned long& Val) { - return (unsigned)(Val * 37UL); + if constexpr (sizeof(Val) == 4) + return DenseMapInfo::getHashValue(Val); + else + return detail::combineHashValue(Val >> 32, Val); } static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) { @@ -153,7 +156,7 @@ template<> struct DenseMapInfo { static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; } static unsigned getHashValue(const unsigned long long& Val) { - return (unsigned)(Val * 37ULL); + return detail::combineHashValue(Val >> 32, Val); } static bool isEqual(const unsigned long long& LHS,