Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend aggregation benchmarks #5096

Merged
merged 1 commit into from
Nov 18, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
67 changes: 36 additions & 31 deletions arrow/benches/aggregate_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,50 +17,55 @@

#[macro_use]
extern crate criterion;
use criterion::Criterion;
use criterion::{Criterion, Throughput};
use rand::distributions::{Distribution, Standard};

extern crate arrow;

use arrow::compute::kernels::aggregate::*;
use arrow::util::bench_util::*;
use arrow::{array::*, datatypes::Float32Type};
use arrow_array::types::{Float64Type, Int16Type, Int32Type, Int64Type, Int8Type};

fn bench_sum(arr_a: &Float32Array) {
criterion::black_box(sum(arr_a).unwrap());
}

fn bench_min(arr_a: &Float32Array) {
criterion::black_box(min(arr_a).unwrap());
}

fn bench_max(arr_a: &Float32Array) {
criterion::black_box(max(arr_a).unwrap());
}
const BATCH_SIZE: usize = 64 * 1024;

fn bench_min_string(arr_a: &StringArray) {
criterion::black_box(min_string(arr_a).unwrap());
fn primitive_benchmark<T: ArrowNumericType>(c: &mut Criterion, name: &str)
where
Standard: Distribution<T::Native>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I doubt it matters for this benchmark, but it is perhaps worth noting that the standard distribution for floats is only between 0 and 1. I don't think this would make a difference to timings, but FYI

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know, and agree it shouldn't affect the timings. The bound is required by bench_utils::create_primitive_array.

{
let nonnull_array = create_primitive_array::<T>(BATCH_SIZE, 0.0);
let nullable_array = create_primitive_array::<T>(BATCH_SIZE, 0.5);
c.benchmark_group(name)
.throughput(Throughput::Bytes(
(std::mem::size_of::<T::Native>() * BATCH_SIZE) as u64,
))
.bench_function("sum nonnull", |b| b.iter(|| sum(&nonnull_array)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised this isn't overflowing, unless sum always wraps?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is indeed always wrapping, scalar version goes through ArrowNativeTypeOp::add_wrapping and I guess the simd version wraps by default. There seems to be a separate sum_checked kernels, I'm not sure yet whether that could be vectorized.

.bench_function("min nonnull", |b| b.iter(|| min(&nonnull_array)))
.bench_function("max nonnull", |b| b.iter(|| max(&nonnull_array)))
.bench_function("sum nullable", |b| b.iter(|| sum(&nullable_array)))
.bench_function("min nullable", |b| b.iter(|| min(&nullable_array)))
.bench_function("max nullable", |b| b.iter(|| max(&nullable_array)));
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_primitive_array::<Float32Type>(512, 0.0);

c.bench_function("sum 512", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min 512", |b| b.iter(|| bench_min(&arr_a)));
c.bench_function("max 512", |b| b.iter(|| bench_max(&arr_a)));

let arr_a = create_primitive_array::<Float32Type>(512, 0.5);

c.bench_function("sum nulls 512", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min nulls 512", |b| b.iter(|| bench_min(&arr_a)));
c.bench_function("max nulls 512", |b| b.iter(|| bench_max(&arr_a)));
primitive_benchmark::<Float32Type>(c, "float32");
primitive_benchmark::<Float64Type>(c, "float64");

let arr_b = create_string_array::<i32>(512, 0.0);
c.bench_function("min string 512", |b| b.iter(|| bench_min_string(&arr_b)));
primitive_benchmark::<Int8Type>(c, "int8");
primitive_benchmark::<Int16Type>(c, "int16");
primitive_benchmark::<Int32Type>(c, "int32");
primitive_benchmark::<Int64Type>(c, "int64");

let arr_b = create_string_array::<i32>(512, 0.5);
c.bench_function("min nulls string 512", |b| {
b.iter(|| bench_min_string(&arr_b))
});
{
let nonnull_strings = create_string_array::<i32>(BATCH_SIZE, 0.0);
let nullable_strings = create_string_array::<i32>(BATCH_SIZE, 0.5);
c.benchmark_group("string")
.throughput(Throughput::Elements(BATCH_SIZE as u64))
.bench_function("min nonnull", |b| b.iter(|| min_string(&nonnull_strings)))
.bench_function("max nonnull", |b| b.iter(|| max_string(&nonnull_strings)))
.bench_function("min nullable", |b| b.iter(|| min_string(&nullable_strings)))
.bench_function("max nullable", |b| b.iter(|| max_string(&nullable_strings)));
}
}

criterion_group!(benches, add_benchmark);
Expand Down