diff --git a/datafusion/core/benches/aggregate_query_sql.rs b/datafusion/core/benches/aggregate_query_sql.rs index 057a0e1d1b54..9da341ce2e92 100644 --- a/datafusion/core/benches/aggregate_query_sql.rs +++ b/datafusion/core/benches/aggregate_query_sql.rs @@ -153,6 +153,38 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); + c.bench_function( + "aggregate_query_group_by_wide_u64_and_string_without_aggregate_expressions", + |b| { + b.iter(|| { + query( + ctx.clone(), + &rt, + // Due to the large number of distinct values in u64_wide, + // this query test the actual grouping performance for more than 1 column + "SELECT u64_wide, utf8 \ + FROM t GROUP BY u64_wide, utf8", + ) + }) + }, + ); + + c.bench_function( + "aggregate_query_group_by_wide_u64_and_f32_without_aggregate_expressions", + |b| { + b.iter(|| { + query( + ctx.clone(), + &rt, + // Due to the large number of distinct values in u64_wide, + // this query test the actual grouping performance for more than 1 column + "SELECT u64_wide, f32 \ + FROM t GROUP BY u64_wide, f32", + ) + }) + }, + ); + c.bench_function("aggregate_query_approx_percentile_cont_on_u64", |b| { b.iter(|| { query( diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs index c0477b1306f7..fffe2e2d1752 100644 --- a/datafusion/core/benches/data_utils/mod.rs +++ b/datafusion/core/benches/data_utils/mod.rs @@ -81,10 +81,11 @@ fn create_data(size: usize, null_density: f64) -> Vec> { .collect() } -fn create_integer_data(size: usize, value_density: f64) -> Vec> { - // use random numbers to avoid spurious compiler optimizations wrt to branching - let mut rng = StdRng::seed_from_u64(42); - +fn create_integer_data( + rng: &mut StdRng, + size: usize, + value_density: f64, +) -> Vec> { (0..size) .map(|_| { if rng.random::() > value_density { @@ -116,7 +117,7 @@ fn create_record_batch( let values = create_data(batch_size, 0.5); // Integer values between [0, u64::MAX]. - let integer_values_wide = create_integer_data(batch_size, 9.0); + let integer_values_wide = create_integer_data(rng, batch_size, 9.0); // Integer values between [0, 9]. let integer_values_narrow = (0..batch_size)