Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
97b8931
refactor(query): refactor code struct
zhang2014 Oct 1, 2025
48875d4
refactor(query): refactor code struct
zhang2014 Oct 1, 2025
11a2f72
refactor(query): refactor left outer join to new join
zhang2014 Oct 2, 2025
788f8b9
refactor(query): refactor left outer join to new join
zhang2014 Oct 2, 2025
3743194
refactor(query): enable experimental new hash join setting
zhang2014 Oct 2, 2025
ecea08f
refactor(query): refactor left outer join to new join
zhang2014 Oct 2, 2025
7f53d75
refactor(query): refactor left outer join to new join
zhang2014 Oct 2, 2025
8c7c396
refactor(query): refactor left outer join to new join
zhang2014 Oct 2, 2025
fa5c7b1
refactor(query): refactor left outer join to new join
zhang2014 Oct 2, 2025
0ebfe9e
refactor(query): refactor left outer join to new join
zhang2014 Oct 3, 2025
8363548
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
3e896d7
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
9fcadf4
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
ad6a21c
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
56a40c2
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
21a6e1a
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
197f5a7
refactor(query): refactor left outer join to new join
zhang2014 Oct 5, 2025
221c2dd
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
4813367
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
e2a98cd
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
32dd07a
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
9f8e9be
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
279ada0
Merge branch 'main' into refactor/left_join
zhang2014 Oct 6, 2025
4c6fa0c
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
554a792
refactor(query): refactor left outer join to new join
zhang2014 Oct 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/common/base/src/hints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,17 @@

#[inline]
pub fn assume(condition: bool) {
if !condition {
unsafe { std::hint::unreachable_unchecked() }
#[cfg(debug_assertions)]
{
if !condition {
panic!("assume condition must be true");
}
}

#[cfg(not(debug_assertions))]
{
if !condition {
unsafe { std::hint::unreachable_unchecked() }
}
}
}
100 changes: 38 additions & 62 deletions src/common/hashtable/src/hashjoin_hashtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::marker::PhantomData;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;

use databend_common_base::hints::assume;
use databend_common_base::mem_allocator::DefaultAllocator;
use databend_common_column::bitmap::Bitmap;

Expand Down Expand Up @@ -215,25 +216,19 @@ where
&self,
hashes: &mut [u64],
bitmap: Option<Bitmap>,
matched_selection: &mut [u32],
unmatched_selection: &mut [u32],
matched_selection: &mut Vec<u32>,
unmatched_selection: &mut Vec<u32>,
) -> (usize, usize) {
let mut valids = None;
if let Some(bitmap) = bitmap {
if bitmap.null_count() == bitmap.len() {
unmatched_selection
.iter_mut()
.enumerate()
.for_each(|(idx, val)| {
*val = idx as u32;
});
unmatched_selection.extend(0..bitmap.null_count() as u32);
return (0, hashes.len());
} else if bitmap.null_count() > 0 {
valids = Some(bitmap);
}
}
let mut matched_idx = 0;
let mut unmatched_idx = 0;

match valids {
Some(valids) => {
valids.iter().zip(hashes.iter_mut().enumerate()).for_each(
Expand All @@ -242,22 +237,15 @@ where
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe {
*matched_selection.get_unchecked_mut(matched_idx) = idx as u32
};
matched_idx += 1;
assume(matched_selection.len() < matched_selection.capacity());
matched_selection.push(idx as u32);
} else {
unsafe {
*unmatched_selection.get_unchecked_mut(unmatched_idx) =
idx as u32
};
unmatched_idx += 1;
assume(unmatched_selection.len() < unmatched_selection.capacity());
unmatched_selection.push(idx as u32);
}
} else {
unsafe {
*unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32
};
unmatched_idx += 1;
assume(unmatched_selection.len() < unmatched_selection.capacity());
unmatched_selection.push(idx as u32);
}
},
);
Expand All @@ -267,72 +255,60 @@ where
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe { *matched_selection.get_unchecked_mut(matched_idx) = idx as u32 };
matched_idx += 1;
assume(matched_selection.len() < matched_selection.capacity());
matched_selection.push(idx as u32);
} else {
unsafe {
*unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32
};
unmatched_idx += 1;
assume(unmatched_selection.len() < unmatched_selection.capacity());
unmatched_selection.push(idx as u32);
}
});
}
}
(matched_idx, unmatched_idx)
(matched_selection.len(), unmatched_selection.len())
}

// Perform early filtering probe and store matched indexes in `selection`, return the number of matched indexes.
fn early_filtering_matched_probe(
&self,
hashes: &mut [u64],
bitmap: Option<Bitmap>,
selection: &mut [u32],
selection: &mut Vec<u32>,
) -> usize {
let mut valids = None;

if let Some(bitmap) = bitmap {
if bitmap.null_count() == bitmap.len() {
hashes.iter_mut().for_each(|hash| {
*hash = 0;
});
return 0;
} else if bitmap.null_count() > 0 {
valids = Some(bitmap);
}
}
let mut count = 0;
match valids {
Some(valids) => {
valids.iter().zip(hashes.iter_mut().enumerate()).for_each(
|(valid, (idx, hash))| {
if valid {
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe { *selection.get_unchecked_mut(count) = idx as u32 };
count += 1;
} else {
*hash = 0;
}
} else {
*hash = 0;
}
},
);
}
None => {
hashes.iter_mut().enumerate().for_each(|(idx, hash)| {

if let Some(valids) = valids {
for (valid, (idx, hash)) in valids.iter().zip(hashes.iter_mut().enumerate()) {
if valid {
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe { *selection.get_unchecked_mut(count) = idx as u32 };
count += 1;
} else {
*hash = 0;
assume(selection.len() < selection.capacity());
selection.push(idx as u32);
}
});
}
}

return selection.len();
}
count

for (idx, hash) in hashes.iter_mut().enumerate() {
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
assume(selection.len() < selection.capacity());
selection.push(idx as u32);
}
}

selection.len()
}

fn next_contains(&self, key: &Self::Key, mut ptr: u64) -> bool {
Expand Down
68 changes: 23 additions & 45 deletions src/common/hashtable/src/hashjoin_string_hashtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::alloc::Allocator;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;

use databend_common_base::hints::assume;
use databend_common_base::mem_allocator::DefaultAllocator;
use databend_common_column::bitmap::Bitmap;

Expand Down Expand Up @@ -144,47 +145,35 @@ where A: Allocator + Clone + 'static
&self,
hashes: &mut [u64],
bitmap: Option<Bitmap>,
matched_selection: &mut [u32],
unmatched_selection: &mut [u32],
matched_selection: &mut Vec<u32>,
unmatched_selection: &mut Vec<u32>,
) -> (usize, usize) {
let mut valids = None;
if let Some(bitmap) = bitmap {
if bitmap.null_count() == bitmap.len() {
unmatched_selection
.iter_mut()
.enumerate()
.for_each(|(idx, val)| {
*val = idx as u32;
});
unmatched_selection.extend(0..bitmap.null_count() as u32);
return (0, hashes.len());
} else if bitmap.null_count() > 0 {
valids = Some(bitmap);
}
}
let mut matched_idx = 0;
let mut unmatched_idx = 0;

match valids {
Some(valids) => {
hashes.iter_mut().enumerate().for_each(|(idx, hash)| {
if unsafe { valids.get_bit_unchecked(idx) } {
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe {
*matched_selection.get_unchecked_mut(matched_idx) = idx as u32
};
matched_idx += 1;
assume(matched_selection.len() < matched_selection.capacity());
matched_selection.push(idx as u32);
} else {
unsafe {
*unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32
};
unmatched_idx += 1;
assume(unmatched_selection.len() < unmatched_selection.capacity());
unmatched_selection.push(idx as u32);
}
} else {
unsafe {
*unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32
};
unmatched_idx += 1;
assume(unmatched_selection.len() < unmatched_selection.capacity());
unmatched_selection.push(idx as u32);
}
});
}
Expand All @@ -193,53 +182,44 @@ where A: Allocator + Clone + 'static
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe { *matched_selection.get_unchecked_mut(matched_idx) = idx as u32 };
matched_idx += 1;
assume(matched_selection.len() < matched_selection.capacity());
matched_selection.push(idx as u32);
} else {
unsafe {
*unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32
};
unmatched_idx += 1;
assume(unmatched_selection.len() < unmatched_selection.capacity());
unmatched_selection.push(idx as u32);
}
});
}
}
(matched_idx, unmatched_idx)
(matched_selection.len(), unmatched_selection.len())
}

// Perform early filtering probe and store matched indexes in `selection`, return the number of matched indexes.
fn early_filtering_matched_probe(
&self,
hashes: &mut [u64],
bitmap: Option<Bitmap>,
selection: &mut [u32],
selection: &mut Vec<u32>,
) -> usize {
let mut valids = None;
if let Some(bitmap) = bitmap {
if bitmap.null_count() == bitmap.len() {
hashes.iter_mut().for_each(|hash| {
*hash = 0;
});
return 0;
} else if bitmap.null_count() > 0 {
valids = Some(bitmap);
}
}
let mut count = 0;

match valids {
Some(valids) => {
hashes.iter_mut().enumerate().for_each(|(idx, hash)| {
if unsafe { valids.get_bit_unchecked(idx) } {
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe { *selection.get_unchecked_mut(count) = idx as u32 };
count += 1;
} else {
*hash = 0;
assume(selection.len() < selection.capacity());
selection.push(idx as u32);
}
} else {
*hash = 0;
}
});
}
Expand All @@ -248,15 +228,13 @@ where A: Allocator + Clone + 'static
let header = self.pointers[(*hash >> self.hash_shift) as usize];
if header != 0 && early_filtering(header, *hash) {
*hash = remove_header_tag(header);
unsafe { *selection.get_unchecked_mut(count) = idx as u32 };
count += 1;
} else {
*hash = 0;
assume(selection.len() < selection.capacity());
selection.push(idx as u32);
}
});
}
}
count
selection.len()
}

fn next_contains(&self, key: &Self::Key, mut ptr: u64) -> bool {
Expand Down
6 changes: 3 additions & 3 deletions src/common/hashtable/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,16 +533,16 @@ pub trait HashJoinHashtableLike {
&self,
hashes: &mut [u64],
valids: Option<Bitmap>,
matched_selection: &mut [u32],
unmatched_selection: &mut [u32],
matched_selection: &mut Vec<u32>,
unmatched_selection: &mut Vec<u32>,
) -> (usize, usize);

// Perform early filtering probe and store matched indexes in `selection`, return the number of matched indexes.
fn early_filtering_matched_probe(
&self,
hashes: &mut [u64],
valids: Option<Bitmap>,
selection: &mut [u32],
selection: &mut Vec<u32>,
) -> usize;

// we use `next_contains` to see whether we can find a matched row in the link.
Expand Down
Loading
Loading