diff --git a/Cargo.toml b/Cargo.toml index 1b9306ce..b7431fcc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ tokio = { version = "1.0", default-features = false, features = [ "rt", ], optional = true } async-std = { version = "1.9", optional = true } +alloca = "0.3.4" [dependencies.plotters] version = "^0.3.1" @@ -61,14 +62,7 @@ futures = { version = "0.3", default_features = false, features = ["executor" maintenance = { status = "passively-maintained" } [features] -stable = [ - "csv_output", - "html_reports", - "async_futures", - "async_smol", - "async_tokio", - "async_std", -] +stable = ["csv_output", "html_reports", "async_futures", "async_smol", "async_tokio", "async_std"] default = ["rayon", "plotters", "cargo_bench_support"] # Enable use of the nightly-only test::black_box function to discourage compiler optimizations. diff --git a/src/routine.rs b/src/routine.rs index b03b171d..2cf2d53c 100644 --- a/src/routine.rs +++ b/src/routine.rs @@ -242,15 +242,31 @@ where elapsed_time: Duration::from_millis(0), }; - iters - .iter() - .map(|iters| { + let mut results = Vec::with_capacity(iters.len()); + results.resize(iters.len(), 0.0); + for (i, iters) in iters.iter().enumerate() { + let stack_alloc = i % 4096; // default page size + #[cfg(any(target_family = "unix", target_family = "windows"))] + { + alloca::with_alloca( + stack_alloc, /* how much bytes we want to allocate */ + |_memory: &mut [core::mem::MaybeUninit] /* dynamically stack allocated slice itself */| { + b.iters = *iters; + (*f)(&mut b, black_box(parameter)); + b.assert_iterated(); + results[i] = m.to_f64(&b.value); + }, + ); + } + #[cfg(not(any(target_family = "unix", target_family = "windows")))] + { b.iters = *iters; (*f)(&mut b, black_box(parameter)); b.assert_iterated(); - m.to_f64(&b.value) - }) - .collect() + results[i] = m.to_f64(&b.value); + } + } + results } fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) { @@ -277,6 +293,8 @@ where } b.iters = b.iters.wrapping_mul(2); + b.iters = b.iters.min(64); // To make sure we offset the test at least with 0-64 bytes + // wit alloca } } }