Skip to content

Commit

Permalink
Add substring support for FixedSizeBinaryArray (#1633)
Browse files Browse the repository at this point in the history
* add function, no tests yet

Signed-off-by: remzi <13716567376yh@gmail.com>

* adjust the fn structure

Signed-off-by: remzi <13716567376yh@gmail.com>

* cargo fmt

Signed-off-by: remzi <13716567376yh@gmail.com>

* add tests

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix clippy

Signed-off-by: remzi <13716567376yh@gmail.com>

* add identical test cases for utf8 and binary

Signed-off-by: remzi <13716567376yh@gmail.com>

* add benchmark

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix offset bug

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix a nit

Signed-off-by: remzi <13716567376yh@gmail.com>
  • Loading branch information
HaoYang670 committed May 5, 2022
1 parent 22e9f95 commit e51de5e
Show file tree
Hide file tree
Showing 2 changed files with 362 additions and 6 deletions.
17 changes: 11 additions & 6 deletions arrow/benches/string_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,27 @@ use arrow::array::*;
use arrow::compute::kernels::substring::substring;
use arrow::util::bench_util::*;

fn bench_substring(arr: &StringArray, start: i64, length: Option<u64>) {
fn bench_substring(arr: &dyn Array, start: i64, length: Option<u64>) {
substring(criterion::black_box(arr), start, length).unwrap();
}

fn add_benchmark(c: &mut Criterion) {
let size = 65536;
let str_len = 1000;
let val_len = 1000;

let arr_string = create_string_array_with_len::<i32>(size, 0.0, str_len);
let arr_string = create_string_array_with_len::<i32>(size, 0.0, val_len);
let arr_fsb = create_fsb_array(size, 0.0, val_len);

c.bench_function("substring (start = 0, length = None)", |b| {
c.bench_function("substring utf8 (start = 0, length = None)", |b| {
b.iter(|| bench_substring(&arr_string, 0, None))
});

c.bench_function("substring (start = 1, length = str_len - 1)", |b| {
b.iter(|| bench_substring(&arr_string, 1, Some((str_len - 1) as u64)))
c.bench_function("substring utf8 (start = 1, length = str_len - 1)", |b| {
b.iter(|| bench_substring(&arr_string, 1, Some((val_len - 1) as u64)))
});

c.bench_function("substring fixed size binary array", |b| {
b.iter(|| bench_substring(&arr_fsb, 1, Some((val_len - 1) as u64)))
});
}

Expand Down

0 comments on commit e51de5e

Please sign in to comment.