Skip to content

Commit

Permalink
final wrong salt check?
Browse files Browse the repository at this point in the history
  • Loading branch information
lnkuiper committed Nov 20, 2024
1 parent 8f00f4c commit 0e12a0f
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 14 deletions.
17 changes: 13 additions & 4 deletions src/execution/aggregate_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,10 @@ void GroupedAggregateHashTable::Verify() {
if (!entry.IsOccupied()) {
continue;
}
#ifndef DISABLE_POINTER_SALT
auto hash = Load<hash_t>(entry.GetPointer() + hash_offset);
D_ASSERT(entry.GetSalt() == ht_entry_t::ExtractSalt(hash));
#endif
total_count++;
}
D_ASSERT(total_count == Count());
Expand Down Expand Up @@ -207,7 +209,9 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
}
auto &entry = entries[entry_idx];
D_ASSERT(!entry.IsOccupied());
#ifndef DISABLE_POINTER_SALT
entry.SetSalt(ht_entry_t::ExtractSalt(hash));
#endif
entry.SetPointer(row_location);
D_ASSERT(entry.IsOccupied());
}
Expand Down Expand Up @@ -339,7 +343,9 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
const auto &hash = hashes[r];
ht_offsets[r] = ApplyBitMask(hash);
D_ASSERT(ht_offsets[r] == hash % capacity);
#ifndef DISABLE_POINTER_SALT
hash_salts[r] = ht_entry_t::ExtractSalt(hash);
#endif
}

// we start out with all entries [0, 1, 2, ..., groups.size()]
Expand Down Expand Up @@ -382,11 +388,10 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
for (inner_iteration_count = 0; inner_iteration_count < capacity; inner_iteration_count++) {
auto &entry = entries[ht_offset];
if (entry.IsOccupied()) { // Cell is occupied: Compare salts
const bool salt_equal =
#ifdef DISABLE_POINTER_SALT
true;
static constexpr bool salt_equal = true;
#else
entry.GetSalt() == salt;
const auto salt_equal = entry.GetSalt() == salt;
#endif
if (salt_equal) {
// Same salt, compare group keys
Expand All @@ -396,8 +401,12 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
// Different salts, move to next entry (linear probing)
IncrementAndWrap(ht_offset, bitmask);
} else { // Cell is unoccupied, let's claim it
// Set salt (also marks as occupied)
// Set salt (also marks as occupied)
#ifdef DISABLE_POINTER_SALT
entry.SetOccupied();
#else
entry.SetSalt(salt);
#endif
// Update selection lists for outer loops
state.empty_vector.set_index(new_entry_count++, index);
new_groups_out.set_index(new_group_count++, index);
Expand Down
20 changes: 18 additions & 2 deletions src/execution/join_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,25 @@ static void ApplyBitmaskAndGetSaltBuild(Vector &hashes_v, const idx_t &count, co
if (hashes_v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
D_ASSERT(!ConstantVector::IsNull(hashes_v));
auto indices = ConstantVector::GetData<hash_t>(hashes_v);
hash_t salt = ht_entry_t::ExtractSaltWithNulls(*indices);
idx_t offset = *indices & bitmask;
#ifdef DISABLE_POINTER_SALT
*indices = offset;
#else
hash_t salt = ht_entry_t::ExtractSaltWithNulls(*indices);
*indices = offset | salt;
#endif
hashes_v.Flatten(count);
} else {
hashes_v.Flatten(count);
auto hashes = FlatVector::GetData<hash_t>(hashes_v);
for (idx_t i = 0; i < count; i++) {
idx_t salt = ht_entry_t::ExtractSaltWithNulls(hashes[i]);
idx_t offset = hashes[i] & bitmask;
#ifdef DISABLE_POINTER_SALT
hashes[i] = offset;
#else
idx_t salt = ht_entry_t::ExtractSaltWithNulls(hashes[i]);
hashes[i] = offset | salt;
#endif
}
}
}
Expand Down Expand Up @@ -605,7 +613,11 @@ static void InsertHashesLoop(atomic<ht_entry_t> entries[], Vector &row_locations
for (idx_t i = 0; i < remaining_count; i++) {
const idx_t row_index = remaining_sel->get_index(i);
idx_t &ht_offset_and_salt = ht_offsets_and_salts[row_index];
#ifdef DISABLE_POINTER_SALT
static constexpr hash_t salt = 0;
#else
const hash_t salt = ht_entry_t::ExtractSalt(ht_offset_and_salt);
#endif

// increment the ht_offset_and_salt of the entry as long as next entry is occupied and salt does not match
idx_t ht_offset;
Expand All @@ -621,9 +633,13 @@ static void InsertHashesLoop(atomic<ht_entry_t> entries[], Vector &row_locations
if (!occupied) {
break;
}
#ifndef DISABLE_POINTER_SALT
if (entry.GetSalt() == salt) {
#endif
break;
#ifndef DISABLE_POINTER_SALT
}
#endif

IncrementAndWrap(ht_offset_and_salt, capacity_mask);
}
Expand Down
42 changes: 34 additions & 8 deletions src/include/duckdb/execution/ht_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

namespace duckdb {

#if !defined(DISABLE_POINTER_SALT) && defined(__ANDROID__)
// #if !defined(DISABLE_POINTER_SALT) && defined(__ANDROID__)
// Google, why does Android need 18446744 TB of address space?
#define DISABLE_POINTER_SALT
#endif
// #endif

//! The ht_entry_t struct represents an individual entry within a hash table.
/*!
Expand All @@ -26,15 +26,10 @@ namespace duckdb {
*/
struct ht_entry_t { // NOLINT
public:
#ifdef DISABLE_POINTER_SALT
static constexpr const hash_t SALT_MASK = 0x0000000000000000;
static constexpr const hash_t POINTER_MASK = 0xFFFFFFFFFFFFFFFF;
#else
//! Upper 16 bits are salt
static constexpr const hash_t SALT_MASK = 0xFFFF000000000000;
//! Lower 48 bits are the pointer
static constexpr const hash_t POINTER_MASK = 0x0000FFFFFFFFFFFF;
#endif

explicit inline ht_entry_t(hash_t value_p) noexcept : value(value_p) {
}
Expand All @@ -47,34 +42,52 @@ struct ht_entry_t { // NOLINT
return value != 0;
}

inline void SetOccupied() {
#ifndef DISABLE_POINTER_SALT
throw InternalException("SetOccupied should only be used when salting is disabled!");
#else
value = 1;
#endif
}

// Returns a pointer based on the stored value without checking cell occupancy.
// This can return a nullptr if the cell is not occupied.
inline data_ptr_t GetPointerOrNull() const {
#ifdef DISABLE_POINTER_SALT
return cast_uint64_to_pointer(value);
#else
return cast_uint64_to_pointer(value & POINTER_MASK);
#endif
}

// Returns a pointer based on the stored value if the cell is occupied
inline data_ptr_t GetPointer() const {
D_ASSERT(IsOccupied());
return cast_uint64_to_pointer(value & POINTER_MASK);
return GetPointerOrNull();
}

inline void SetPointer(const data_ptr_t &pointer) {
#ifdef DISABLE_POINTER_SALT
value = cast_pointer_to_uint64(pointer);
#else
// Pointer shouldn't use upper bits
D_ASSERT((cast_pointer_to_uint64(pointer) & SALT_MASK) == 0);
// Value should have all 1's in the pointer area
D_ASSERT((value & POINTER_MASK) == POINTER_MASK);
// Set upper bits to 1 in pointer so the salt stays intact
value &= cast_pointer_to_uint64(pointer) | SALT_MASK;
#endif
}

// Returns the salt, leaves upper salt bits intact, sets lower bits to all 1's
static inline hash_t ExtractSalt(hash_t hash) {
CheckSaltUsage();
return hash | POINTER_MASK;
}

// Returns the salt, leaves upper salt bits intact, sets lower bits to all 0's
static inline hash_t ExtractSaltWithNulls(hash_t hash) {
CheckSaltUsage();
return hash & SALT_MASK;
}

Expand All @@ -83,23 +96,36 @@ struct ht_entry_t { // NOLINT
}

inline void SetSalt(const hash_t &salt) {
CheckSaltUsage();
// Shouldn't be occupied when we set this
D_ASSERT(!IsOccupied());
value = POINTER_MASK;
// Salt should have all 1's in the pointer field
D_ASSERT((salt & POINTER_MASK) == POINTER_MASK);
// No need to mask, just put the whole thing there
value = salt;
}

static inline ht_entry_t GetDesiredEntry(const data_ptr_t &pointer, const hash_t &salt) {
#ifdef DISABLE_POINTER_SALT
return ht_entry_t(cast_pointer_to_uint64(pointer));
#else
auto desired = cast_pointer_to_uint64(pointer) | (salt & SALT_MASK);
return ht_entry_t(desired);
#endif
}

static inline ht_entry_t GetEmptyEntry() {
return ht_entry_t(0);
}

private:
static inline void CheckSaltUsage() {
#ifdef DISABLE_POINTER_SALT
throw InternalException("Salt should not be used!");
#endif
}

private:
hash_t value;
};
Expand Down

0 comments on commit 0e12a0f

Please sign in to comment.