Skip to content

Commit

Permalink
ARROW-6666: [Rust] Datafusion parquet string literal support
Browse files Browse the repository at this point in the history
This change also required having to add comparison predicates for UTF8 strings to Arrow's comparison kernels. Included also are support for the LIKE and NOT LIKE operators.

Closes #6469 from maxburke/datafusion_parquet_string_literal_support and squashes the following commits:

2995774 <Max Burke> Attend to issues raised in PR review
3c172f1 <Max Burke> Re-introduce a compare op function that doesn't use simd for the benchmark suite
7de67a2 <Max Burke> Add support for string comparisons to DataFusion.
1e91323 <Max Burke> Ignore vim swap files
5aad806 <Max Burke> cargo fmt
138717c <Max Burke> Add support to datafusion-cli for parquet files.

Authored-by: Max Burke <max@urbanlogiq.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
  • Loading branch information
maxburke authored and kszucs committed Feb 24, 2020
1 parent dc01a36 commit 520d47d
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 32 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ docker_cache
.gdb_history
.DS_Store
*.orig
.*.swp
.*.swo

site/

Expand Down
12 changes: 6 additions & 6 deletions rust/arrow/benches/comparison_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,37 +39,37 @@ fn create_array(size: usize) -> Float32Array {
pub fn eq_no_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(compare_op(&arr_a, &arr_b, |a, b| a == b).unwrap());
criterion::black_box(no_simd_compare_op(&arr_a, &arr_b, |a, b| a == b).unwrap());
}

pub fn neq_no_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(compare_op(&arr_a, &arr_b, |a, b| a != b).unwrap());
criterion::black_box(no_simd_compare_op(&arr_a, &arr_b, |a, b| a != b).unwrap());
}

pub fn lt_no_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(compare_op(&arr_a, &arr_b, |a, b| a < b).unwrap());
criterion::black_box(no_simd_compare_op(&arr_a, &arr_b, |a, b| a < b).unwrap());
}

fn lt_eq_no_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(compare_op(&arr_a, &arr_b, |a, b| a <= b).unwrap());
criterion::black_box(no_simd_compare_op(&arr_a, &arr_b, |a, b| a <= b).unwrap());
}

pub fn gt_no_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(compare_op(&arr_a, &arr_b, |a, b| a > b).unwrap());
criterion::black_box(no_simd_compare_op(&arr_a, &arr_b, |a, b| a > b).unwrap());
}

fn gt_eq_no_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(compare_op(&arr_a, &arr_b, |a, b| a >= b).unwrap());
criterion::black_box(no_simd_compare_op(&arr_a, &arr_b, |a, b| a >= b).unwrap());
}

fn eq_simd(size: usize) {
Expand Down
1 change: 1 addition & 0 deletions rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,7 @@ impl StringArray {
self.value_data.get().offset(pos as isize),
(self.value_offset_at(offset + 1) - pos) as usize,
);

std::str::from_utf8_unchecked(slice)
}
}
Expand Down
Loading

0 comments on commit 520d47d

Please sign in to comment.