Skip to content

Commit

Permalink
ARROW-10609: [Rust] Optimize min/max of non null strings
Browse files Browse the repository at this point in the history
Applies the same optimization as in ARROW-10595. Difference is smaller, but still there:

```
min string 512          time:   [3.4096 us 3.4378 us 3.4683 us]
                        change: [-13.563% -13.111% -12.628%] (p = 0.00 < 0.05)
                        Performance has improved.
```

Closes #8673 from Dandandan/min_max_string

Authored-by: Heres, Daniel <danielheres@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
Dandandan authored and alamb committed Nov 16, 2020
1 parent ca9783b commit e5fce7f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 21 deletions.
44 changes: 36 additions & 8 deletions rust/arrow/benches/aggregate_kernels.rs
Expand Up @@ -19,16 +19,15 @@
extern crate criterion;
use criterion::Criterion;

use rand::Rng;
use std::sync::Arc;
use rand::{distributions::Alphanumeric, Rng};

extern crate arrow;

use arrow::array::*;
use arrow::compute::kernels::aggregate::*;
use arrow::util::test_util::seedable_rng;

fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
fn create_array(size: usize, with_nulls: bool) -> Float32Array {
// use random numbers to avoid spurious compiler optimizations wrt to branching
let mut rng = seedable_rng();
let mut builder = Float32Builder::new(size);
Expand All @@ -40,19 +39,40 @@ fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
builder.append_value(rng.gen()).unwrap();
}
}
Arc::new(builder.finish())
builder.finish()
}

fn bench_sum(arr_a: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
fn create_string_array(size: usize, with_nulls: bool) -> StringArray {
// use random numbers to avoid spurious compiler optimizations wrt to branching
let mut rng = seedable_rng();
let mut builder = StringBuilder::new(size);

for _ in 0..size {
if with_nulls && rng.gen::<f32>() > 0.5 {
builder.append_null().unwrap();
} else {
let string = seedable_rng()
.sample_iter(&Alphanumeric)
.take(10)
.collect::<String>();
builder.append_value(&string).unwrap();
}
}
builder.finish()
}

fn bench_sum(arr_a: &Float32Array) {
criterion::black_box(sum(&arr_a).unwrap());
}

fn bench_min(arr_a: &ArrayRef) {
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
fn bench_min(arr_a: &Float32Array) {
criterion::black_box(min(&arr_a).unwrap());
}

fn bench_min_string(arr_a: &StringArray) {
criterion::black_box(min_string(&arr_a).unwrap());
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_array(512, false);

Expand All @@ -63,6 +83,14 @@ fn add_benchmark(c: &mut Criterion) {

c.bench_function("sum nulls 512", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min nulls 512", |b| b.iter(|| bench_min(&arr_a)));

let arr_b = create_string_array(512, false);
c.bench_function("min string 512", |b| b.iter(|| bench_min_string(&arr_b)));

let arr_b = create_string_array(512, true);
c.bench_function("min nulls string 512", |b| {
b.iter(|| bench_min_string(&arr_b))
});
}

criterion_group!(benches, add_benchmark);
Expand Down
23 changes: 10 additions & 13 deletions rust/arrow/src/compute/kernels/aggregate.rs
Expand Up @@ -32,19 +32,20 @@ fn min_max_string<T: StringOffsetSizeTrait, F: Fn(&str, &str) -> bool>(
if null_count == array.len() {
return None;
}
let mut n = "";
let mut has_value = false;
let data = array.data();

let mut n;
if null_count == 0 {
for i in 0..data.len() {
n = array.value(0);
for i in 1..data.len() {
let item = array.value(i);
if !has_value || cmp(&n, item) {
has_value = true;
if cmp(&n, item) {
n = item;
}
}
} else {
n = "";
let mut has_value = false;

for i in 0..data.len() {
let item = array.value(i);
if data.is_valid(i) && (!has_value || cmp(&n, item)) {
Expand Down Expand Up @@ -105,13 +106,9 @@ where

if null_count == 0 {
// optimized path for arrays without null values
n = m[0];

for item in &m[1..] {
if cmp(&n, item) {
n = *item
}
}
n = m[1..]
.iter()
.fold(m[0], |max, item| if cmp(&max, item) { *item } else { max });
} else {
n = T::default_value();
let mut has_value = false;
Expand Down

0 comments on commit e5fce7f

Please sign in to comment.