-
Notifications
You must be signed in to change notification settings - Fork 1.8k
TESTING: Optimize byte view comparison in multi groupby #19344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dee4cb8
91ac482
9a83e18
d4eaebb
ec6b499
7c999f7
3031658
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,15 +117,38 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> { | |
| self.do_append_val_inner(arr, row); | ||
| } | ||
|
|
||
| fn vectorized_equal_to_inner( | ||
| fn vectorized_equal_to_inner_no_nulls( | ||
| &self, | ||
| lhs_rows: &[usize], | ||
| array: &ArrayRef, | ||
| array: &GenericByteViewArray<B>, | ||
| rhs_rows: &[usize], | ||
| equal_to_results: &mut [bool], | ||
| ) { | ||
| let array = array.as_byte_view::<B>(); | ||
| let iter = izip!( | ||
| lhs_rows.iter(), | ||
| rhs_rows.iter(), | ||
| equal_to_results.iter_mut(), | ||
| ); | ||
|
|
||
| // if the input array has no nulls, can skip null check | ||
| for (&lhs_row, &rhs_row, equal_to_result) in iter { | ||
| // Has found not equal to, don't need to check | ||
| if !*equal_to_result { | ||
| continue; | ||
| } | ||
|
|
||
| *equal_to_result = | ||
| self.do_equal_to_inner_values_only(lhs_row, array, rhs_row); | ||
| } | ||
| } | ||
|
|
||
| fn vectorized_equal_to_inner_nulls( | ||
| &self, | ||
| lhs_rows: &[usize], | ||
| array: &GenericByteViewArray<B>, | ||
| rhs_rows: &[usize], | ||
| equal_to_results: &mut [bool], | ||
| ) { | ||
| let iter = izip!( | ||
| lhs_rows.iter(), | ||
| rhs_rows.iter(), | ||
|
|
@@ -226,14 +249,26 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> { | |
| let exist_null = self.nulls.is_null(lhs_row); | ||
| let input_null = array.is_null(rhs_row); | ||
| if let Some(result) = nulls_equal_to(exist_null, input_null) { | ||
| return result; | ||
| result | ||
| } else { | ||
| self.do_equal_to_inner_values_only(lhs_row, array, rhs_row) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. perhaps this could be optimized as well (in the outer loop) for empty data buffers
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will give it a try
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's move this conversation to #19364 |
||
| } | ||
| } | ||
|
|
||
| // Otherwise, we need to check their values | ||
| let exist_view = self.views[lhs_row]; | ||
| /// Same as equal_to_inner, but without null checks | ||
| /// | ||
| /// Checks only the values for equality | ||
| #[inline(always)] | ||
| fn do_equal_to_inner_values_only( | ||
| &self, | ||
| lhs_row: usize, | ||
| array: &GenericByteViewArray<B>, | ||
| rhs_row: usize, | ||
| ) -> bool { | ||
| let exist_view = unsafe { *self.views.get_unchecked(lhs_row) }; | ||
| let exist_view_len = exist_view as u32; | ||
|
|
||
| let input_view = array.views()[rhs_row]; | ||
| let input_view = unsafe { *array.views().get_unchecked(rhs_row) }; | ||
| let input_view_len = input_view as u32; | ||
|
|
||
| // The check logic | ||
|
|
@@ -246,19 +281,9 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> { | |
| } | ||
|
|
||
| if exist_view_len <= 12 { | ||
| let exist_inline = unsafe { | ||
| GenericByteViewArray::<B>::inline_value( | ||
| &exist_view, | ||
| exist_view_len as usize, | ||
| ) | ||
| }; | ||
| let input_inline = unsafe { | ||
| GenericByteViewArray::<B>::inline_value( | ||
| &input_view, | ||
| input_view_len as usize, | ||
| ) | ||
| }; | ||
| exist_inline == input_inline | ||
| // the views are inlined and the lengths are equal, so just | ||
| // compare the views directly | ||
| exist_view == input_view | ||
| } else { | ||
| let exist_prefix = | ||
| unsafe { GenericByteViewArray::<B>::inline_value(&exist_view, 4) }; | ||
|
|
@@ -507,7 +532,22 @@ impl<B: ByteViewType> GroupColumn for ByteViewGroupValueBuilder<B> { | |
| rows: &[usize], | ||
| equal_to_results: &mut [bool], | ||
| ) { | ||
| self.vectorized_equal_to_inner(group_indices, array, rows, equal_to_results); | ||
| let array = array.as_byte_view::<B>(); | ||
| if array.null_count() == 0 { | ||
| self.vectorized_equal_to_inner_no_nulls( | ||
| group_indices, | ||
| array, | ||
| rows, | ||
| equal_to_results, | ||
| ); | ||
| } else { | ||
| self.vectorized_equal_to_inner_nulls( | ||
| group_indices, | ||
| array, | ||
| rows, | ||
| equal_to_results, | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| fn vectorized_append(&mut self, array: &ArrayRef, rows: &[usize]) -> Result<()> { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we can get this to compile to a well-vectorized loop.