From 94469dbd30b66f95a23d377aa462ae01eadfa9b2 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Thu, 25 Jan 2024 15:49:17 +0100 Subject: [PATCH 1/6] Add min rel accuracy stopping criterion --- include/benchmark/benchmark.h | 4 ++++ src/benchmark_api_internal.cc | 5 +++++ src/benchmark_api_internal.h | 2 ++ src/benchmark_register.cc | 8 ++++++++ src/benchmark_runner.cc | 24 ++++++++++-------------- src/benchmark_runner.h | 4 ++++ src/thread_manager.h | 1 + src/thread_timer.h | 12 +++++++++++- 8 files changed, 45 insertions(+), 15 deletions(-) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 08cfe29da3..644c4912c4 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -1198,6 +1198,8 @@ class BENCHMARK_EXPORT Benchmark { // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + Benchmark* MinRelAccuracy(double r); + // Set the minimum amount of time to run the benchmark before taking runtimes // of this benchmark into account. This // option overrides the `benchmark_min_warmup_time` flag. @@ -1320,6 +1322,7 @@ class BENCHMARK_EXPORT Benchmark { int range_multiplier_; double min_time_; + double min_rel_accuracy_; double min_warmup_time_; IterationCount iterations_; int repetitions_; @@ -1751,6 +1754,7 @@ struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; + std::string min_rel_accuracy; std::string min_warmup_time; std::string iterations; std::string repetitions; diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 286f986530..6087fff187 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), min_time_(benchmark_.min_time_), + min_rel_accuracy_(benchmark_.min_rel_accuracy_), min_warmup_time_(benchmark_.min_warmup_time_), iterations_(benchmark_.iterations_), threads_(thread_count) { @@ -51,6 +52,10 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); } + if (!IsZero(benchmark->min_rel_accuracy_)) { + name_.min_rel_accuracy = StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_); + } + if (!IsZero(benchmark->min_warmup_time_)) { name_.min_warmup_time = StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 94f516531b..38e5e40d95 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -36,6 +36,7 @@ class BenchmarkInstance { const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } + double min_rel_accuracy() const { return min_rel_accuracy_; } double min_warmup_time() const { return min_warmup_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } @@ -63,6 +64,7 @@ class BenchmarkInstance { const std::vector& statistics_; int repetitions_; double min_time_; + double min_rel_accuracy_; double min_warmup_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 8ade048225..ef9ce40d5a 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -211,6 +211,7 @@ Benchmark::Benchmark(const std::string& name) use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), + min_rel_accuracy_(0), min_warmup_time_(0), iterations_(0), repetitions_(0), @@ -356,6 +357,13 @@ Benchmark* Benchmark::MinTime(double t) { return this; } +Benchmark* Benchmark::MinRelAccuracy(double r) { + BM_CHECK(r > 0.0); + BM_CHECK(iterations_ == 0); + min_rel_accuracy_ = r; + return this; +} + Benchmark* Benchmark::MinWarmUpTime(double t) { BM_CHECK(t >= 0.0); BM_CHECK(iterations_ == 0); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index a74bdadd3e..15e9b0e748 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -140,6 +140,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, results.cpu_time_used += timer.cpu_time_used(); results.real_time_used += timer.real_time_used(); results.manual_time_used += timer.manual_time_used(); + results.manual_time_used2 += timer.manual_time_used2(); results.complexity_n += st.complexity_length_n(); internal::Increment(&results.counters, st.counters); } @@ -226,6 +227,7 @@ BenchmarkRunner::BenchmarkRunner( reports_for_family(reports_for_family_), parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), min_time(ComputeMinTime(b_, parsed_benchtime_flag)), + min_rel_accuracy(b_min_rel_accuracy()), min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time), @@ -302,8 +304,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; + i.seconds2 = 0; if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; + i.seconds2 = i.results.manual_time_used2; } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } @@ -323,17 +327,9 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( // expansion should be 14x. const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; - - // So what seems to be the sufficiently-large iteration count? Round up. - const IterationCount max_next_iters = static_cast( - std::llround(std::max(multiplier * static_cast(i.iters), - static_cast(i.iters) + 1.0))); - // But we do have *some* limits though.. - const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); - - BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; - return next_iters; // round up before conversion to integer. -} + if (!IsZero(GetMinRelAccuracy())) { + multiplier = std::max(multiplier, std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters) * 1.4 / GetMinRelAccuracy()); + } bool BenchmarkRunner::ShouldReportIterationResults( const IterationResults& i) const { @@ -342,13 +338,13 @@ bool BenchmarkRunner::ShouldReportIterationResults( // or because an error was reported. return i.results.skipped_ || i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= - GetMinTimeToApply() || // The elapsed time is large enough. + (((i.seconds >= GetMinTimeToApply()) && + (b.use_manual_time() && !IsZero(GetMinRelAccuracy()) && (std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters) <= GetMinRelAccuracy()))) || // The relative accuracy is enough. // CPU time is specified but the elapsed real time greatly exceeds // the minimum time. // Note that user provided timers are except from this test. ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && - !b.use_manual_time()); + !b.use_manual_time())); } double BenchmarkRunner::GetMinTimeToApply() const { diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index db2fa04396..8b487b291d 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -77,6 +77,8 @@ class BenchmarkRunner { double GetMinTime() const { return min_time; } + double GetMinRelAccuracy() const { return min_rel_accuracy; } + bool HasExplicitIters() const { return has_explicit_iteration_count; } IterationCount GetIters() const { return iters; } @@ -89,6 +91,7 @@ class BenchmarkRunner { BenchTimeType parsed_benchtime_flag; const double min_time; + const double min_rel_accuracy; const double min_warmup_time; bool warmup_done; const int repeats; @@ -110,6 +113,7 @@ class BenchmarkRunner { internal::ThreadManager::Result results; IterationCount iters; double seconds; + double seconds2; }; IterationResults DoNIterations(); diff --git a/src/thread_manager.h b/src/thread_manager.h index 819b3c44db..28d061e0f9 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -41,6 +41,7 @@ class ThreadManager { double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; + double manual_time_used2 = 0; int64_t complexity_n = 0; std::string report_label_; std::string skip_message_; diff --git a/src/thread_timer.h b/src/thread_timer.h index eb23f59561..26a8d47e9e 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -38,7 +38,10 @@ class ThreadTimer { } // Called by each thread - void SetIterationTime(double seconds) { manual_time_used_ += seconds; } + void SetIterationTime(double seconds) { + manual_time_used_ += seconds; + manual_time_used2_ += std::pow(seconds, 2.); + } bool running() const { return running_; } @@ -60,6 +63,11 @@ class ThreadTimer { return manual_time_used_; } + double manual_time_used2() const { + BM_CHECK(!running_); + return manual_time_used2_; + } + private: double ReadCpuTimerOfChoice() const { if (measure_process_cpu_time) return ProcessCPUUsage(); @@ -75,9 +83,11 @@ class ThreadTimer { // Accumulated time so far (does not contain current slice if running_) double real_time_used_ = 0; + double cpu_time_used_ = 0; // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_ = 0; + double manual_time_used2_ = 0; }; } // namespace internal From bbcf4dcbb0a885ebbe484cc9da70cf3ee841d90c Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Thu, 25 Jan 2024 17:45:24 +0100 Subject: [PATCH 2/6] Clean up the initial commit --- src/benchmark_runner.cc | 43 ++++++++++++++++++++++++++++++++--------- src/benchmark_runner.h | 6 ++++++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 15e9b0e748..4a1180f403 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -327,24 +327,33 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( // expansion should be 14x. const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; + if (!IsZero(GetMinRelAccuracy())) { - multiplier = std::max(multiplier, std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters) * 1.4 / GetMinRelAccuracy()); + multiplier = std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy()); } + // So what seems to be the sufficiently-large iteration count? Round up. + const IterationCount max_next_iters = static_cast( + std::lround(std::max(multiplier * static_cast(i.iters), + static_cast(i.iters) + 1.0))); + // But we do have *some* limits though.. + const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); + + BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + return next_iters; // round up before conversion to integer. +} + bool BenchmarkRunner::ShouldReportIterationResults( const IterationResults& i) const { // Determine if this run should be reported; // Either it has run for a sufficient amount of time // or because an error was reported. return i.results.skipped_ || - i.iters >= kMaxIterations || // Too many iterations already. - (((i.seconds >= GetMinTimeToApply()) && - (b.use_manual_time() && !IsZero(GetMinRelAccuracy()) && (std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters) <= GetMinRelAccuracy()))) || // The relative accuracy is enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this test. - ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && - !b.use_manual_time())); + // Too many iterations already. + i.iters >= kMaxIterations || + // We have applied for enough time and the relative accuracy is good enough. + // Relative accuracy is checked only for user provided timers. + (HasSufficientTimeToApply(i) && (!b.use_manual_time() || HasSufficientRelAccuracy(i))); } double BenchmarkRunner::GetMinTimeToApply() const { @@ -356,6 +365,22 @@ double BenchmarkRunner::GetMinTimeToApply() const { return warmup_done ? min_time : min_warmup_time; } +double GetRelAccuracy(const IterationResults& i) const { + return std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters); +} + +bool BenchmarkRunner::HasSufficientTimeToApply(const IterationResults& i) const { + return i.seconds >= GetMinTimeToApply() || + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this test. + (!b.use_manual_time() && i.results.real_time_used >= 5 * GetMinTimeToApply()); +} + +bool BenchmarkRunner::HasSufficientRelAccuracy(const IterationResults& i) const { + return (!IsZero(GetMinRelAccuracy()) && (GetRelAccuracy(i) <= GetMinRelAccuracy())); +} + void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { warmup_done = true; iters = i; diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 8b487b291d..d20a84ff82 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -123,6 +123,12 @@ class BenchmarkRunner { double GetMinTimeToApply() const; + double GetRelAccuracy(const IterationResults& i) const; + + bool HasSufficientTimeToApply(const IterationResults& i) const; + + bool HasSufficientRelAccuracy(const IterationResults& i) const; + void FinishWarmUp(const IterationCount& i); void RunWarmUp(); From da8d23f342ea7119c04482f21bdfc6b522848d44 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Fri, 26 Jan 2024 12:14:28 +0100 Subject: [PATCH 3/6] Further cleaning of initial commit. Add test. --- include/benchmark/benchmark.h | 22 ++++- src/benchmark.cc | 25 ++++-- src/benchmark_register.cc | 1 + src/benchmark_runner.cc | 23 ++--- src/benchmark_runner.h | 3 +- src/thread_manager.h | 2 +- src/thread_timer.h | 9 +- test/CMakeLists.txt | 3 + test/benchmark_min_rel_accuracy_flag_test.cc | 88 ++++++++++++++++++++ 9 files changed, 150 insertions(+), 26 deletions(-) create mode 100644 test/benchmark_min_rel_accuracy_flag_test.cc diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 644c4912c4..526714c3b3 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -126,8 +126,12 @@ template int BM_Sequential(benchmark::State& state) { } BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -Use `Benchmark::MinTime(double t)` to set the minimum time used to run the -benchmark. This option overrides the `benchmark_min_time` flag. +Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how long +to run the benchmark. This option overrides the `benchmark_min_time` flag. + +If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)` to set +the required minimum relative accuracy used to determine how long to run the benchmark. +This option overrides the `benchmark_min_rel_accuracy` flag. void BM_test(benchmark::State& state) { ... body ... @@ -1193,11 +1197,19 @@ class BENCHMARK_EXPORT Benchmark { // multiplier kRangeMultiplier will be used. Benchmark* RangeMultiplier(int multiplier); - // Set the minimum amount of time to use when running this benchmark. This - // option overrides the `benchmark_min_time` flag. + // Set the minimum amount of time to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_time` flag. // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum relative accuracy to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_rel_accuracy` flag. + // REQUIRES: `t > 0`, `Iterations` has not been called on this benchmark, and + // time is measured manually, i.e., `UseManualTime` has been called on this + // benchmark and each benchmark iteration should call SetIterationTime(seconds) + // to report the measured time. Benchmark* MinRelAccuracy(double r); // Set the minimum amount of time to run the benchmark before taking runtimes @@ -1794,6 +1806,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { threads(1), time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), + manual_accumulated_time_pow2(0), cpu_accumulated_time(0), max_heapbytes_used(0), use_real_time_for_initial_big_o(false), @@ -1822,6 +1835,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { int64_t repetitions; TimeUnit time_unit; double real_accumulated_time; + double manual_accumulated_time_pow2; double cpu_accumulated_time; // Return a value representing the real time per iteration in the unit diff --git a/src/benchmark.cc b/src/benchmark.cc index 337bb3faa7..2c31299ed5 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -65,11 +65,11 @@ BM_DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Specification of how long to run the benchmark. +// Specification of either an exact number of iterations (specified as `x`) +// or a minimum number of seconds (specified as `s`) used to determine how +// long to run the benchmark. // -// It can be either an exact number of iterations (specified as `x`), -// or a minimum number of seconds (specified as `s`). If the latter -// format (ie., min seconds) is used, the system may run the benchmark longer +// If the latter format (ie., min seconds) is used, the system may run the benchmark longer // until the results are considered significant. // // For backward compatibility, the `s` suffix may be omitted, in which case, @@ -81,6 +81,18 @@ BM_DEFINE_string(benchmark_filter, ""); // benchmark execution, regardless of number of threads. BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); +// Specification of required relative accuracy used to determine how +// long to run the benchmark. +// +// REQUIRES: time is measured manually. +// +// Manual timers provide per-iteration times. The relative accuracy is +// measured as the standard deviation of these per-iteration times divided by +// the mean and the square root of the number of iterations. The benchmark is +// run until the specified minimum time or number of iterations is reached +// and the measured relative accuracy meets the specified requirement. +BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0); + // Minimum number of seconds a benchmark should be run before results should be // taken into account. This e.g can be necessary for benchmarks of code which // needs to fill some form of cache before performance is of interest. @@ -703,6 +715,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy", + &FLAGS_benchmark_min_rel_accuracy) || ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", @@ -770,7 +784,8 @@ void PrintDefaultHelp() { "benchmark" " [--benchmark_list_tests={true|false}]\n" " [--benchmark_filter=]\n" - " [--benchmark_min_time=`x` OR `s` ]\n" + " [--benchmark_min_time=`x` OR `s`]\n" + " [--benchmark_min_rel_accuracy=]\n" " [--benchmark_min_warmup_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_enable_random_interleaving={true|false}]\n" diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index ef9ce40d5a..13ba9e554e 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -360,6 +360,7 @@ Benchmark* Benchmark::MinTime(double t) { Benchmark* Benchmark::MinRelAccuracy(double r) { BM_CHECK(r > 0.0); BM_CHECK(iterations_ == 0); + BM_CHECK(use_manual_time_); min_rel_accuracy_ = r; return this; } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 4a1180f403..4c582c9bd1 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -93,6 +93,7 @@ BenchmarkReporter::Run CreateRunReport( if (!report.skipped) { if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; + report.manual_accumulated_time_pow2 = results.manual_time_used_pow2; } else { report.real_accumulated_time = results.real_time_used; } @@ -140,7 +141,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, results.cpu_time_used += timer.cpu_time_used(); results.real_time_used += timer.real_time_used(); results.manual_time_used += timer.manual_time_used(); - results.manual_time_used2 += timer.manual_time_used2(); + results.manual_time_used_pow2 += timer.manual_time_used_pow2(); results.complexity_n += st.complexity_length_n(); internal::Increment(&results.counters, st.counters); } @@ -226,8 +227,10 @@ BenchmarkRunner::BenchmarkRunner( : b(b_), reports_for_family(reports_for_family_), parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), - min_time(ComputeMinTime(b_, parsed_benchtime_flag)), - min_rel_accuracy(b_min_rel_accuracy()), + min_time(ComputeMinTime(b, parsed_benchtime_flag)), + min_rel_accuracy(!IsZero(b.min_rel_accuracy()) + ? b.min_rel_accuracy() + : FLAGS_benchmark_min_rel_accuracy), min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time), @@ -304,10 +307,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; - i.seconds2 = 0; + i.seconds_pow2 = 0; if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; - i.seconds2 = i.results.manual_time_used2; + i.seconds_pow2 = i.results.manual_time_used_pow2; } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } @@ -334,8 +337,8 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( - std::lround(std::max(multiplier * static_cast(i.iters), - static_cast(i.iters) + 1.0))); + std::llround(std::max(multiplier * static_cast(i.iters), + static_cast(i.iters) + 1.0))); // But we do have *some* limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); @@ -365,8 +368,8 @@ double BenchmarkRunner::GetMinTimeToApply() const { return warmup_done ? min_time : min_warmup_time; } -double GetRelAccuracy(const IterationResults& i) const { - return std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters); +double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const { + return std::sqrt(i.seconds_pow2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters); } bool BenchmarkRunner::HasSufficientTimeToApply(const IterationResults& i) const { @@ -378,7 +381,7 @@ bool BenchmarkRunner::HasSufficientTimeToApply(const IterationResults& i) const } bool BenchmarkRunner::HasSufficientRelAccuracy(const IterationResults& i) const { - return (!IsZero(GetMinRelAccuracy()) && (GetRelAccuracy(i) <= GetMinRelAccuracy())); + return (IsZero(GetMinRelAccuracy()) || (GetRelAccuracy(i) <= GetMinRelAccuracy())); } void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index d20a84ff82..fe9df08e4e 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -26,6 +26,7 @@ namespace benchmark { BM_DECLARE_string(benchmark_min_time); +BM_DECLARE_double(benchmark_min_rel_accuracy); BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); @@ -113,7 +114,7 @@ class BenchmarkRunner { internal::ThreadManager::Result results; IterationCount iters; double seconds; - double seconds2; + double seconds_pow2; }; IterationResults DoNIterations(); diff --git a/src/thread_manager.h b/src/thread_manager.h index 28d061e0f9..e3e6c5a2b8 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -41,7 +41,7 @@ class ThreadManager { double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; - double manual_time_used2 = 0; + double manual_time_used_pow2 = 0; int64_t complexity_n = 0; std::string report_label_; std::string skip_message_; diff --git a/src/thread_timer.h b/src/thread_timer.h index 26a8d47e9e..464974f73f 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -40,7 +40,7 @@ class ThreadTimer { // Called by each thread void SetIterationTime(double seconds) { manual_time_used_ += seconds; - manual_time_used2_ += std::pow(seconds, 2.); + manual_time_used_pow2_ += std::pow(seconds, 2.); } bool running() const { return running_; } @@ -63,9 +63,9 @@ class ThreadTimer { return manual_time_used_; } - double manual_time_used2() const { + double manual_time_used_pow2() const { BM_CHECK(!running_); - return manual_time_used2_; + return manual_time_used_pow2_; } private: @@ -83,11 +83,10 @@ class ThreadTimer { // Accumulated time so far (does not contain current slice if running_) double real_time_used_ = 0; - double cpu_time_used_ = 0; // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_ = 0; - double manual_time_used2_ = 0; + double manual_time_used_pow2_ = 0; }; } // namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1de175f98d..7a907cb054 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,6 +97,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_ compile_benchmark_test(benchmark_min_time_flag_iters_test) benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) +compile_benchmark_test(benchmark_min_rel_accuracy_flag_test) +benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test) + add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) diff --git a/test/benchmark_min_rel_accuracy_flag_test.cc b/test/benchmark_min_rel_accuracy_flag_test.cc new file mode 100644 index 0000000000..0221e3ba3d --- /dev/null +++ b/test/benchmark_min_rel_accuracy_flag_test.cc @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that if a benchmark measures time manually, we can specify the required relative accuracy with +// --benchmark_min_rel_accuracy=. +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + iters_.push_back(report[0].iterations); + real_accumulated_times_.push_back(report[0].real_accumulated_time); + manual_accumulated_time_pow2s_.push_back(report[0].manual_accumulated_time_pow2); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetIters() const { + return iters_; + } + + const std::vector& GetRealAccumulatedTimes() const { + return real_accumulated_times_; + } + + const std::vector& GetManualAccumulatedTimePow2s() const { + return manual_accumulated_time_pow2s_; + } + + private: + std::vector iters_; + std::vector real_accumulated_times_; + std::vector manual_accumulated_time_pow2s_; +}; + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_real_distribution mrand(0, 1); + + for (auto s : state) { + state.SetIterationTime(mrand(rd)); + } +} +BENCHMARK(BM_MyBench)->UseManualTime(); + +int main(int argc, char** argv) { + // Make a fake argv and append the new --benchmark_min_rel_accuracy= to it. + int fake_argc = argc + 2; + const char** fake_argv = new const char*[static_cast(fake_argc)]; + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + fake_argv[argc] = "--benchmark_min_time=10s"; + fake_argv[argc + 1] = "--benchmark_min_rel_accuracy=0.01"; + + benchmark::Initialize(&fake_argc, const_cast(fake_argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the executed iters. + const benchmark::IterationCount iters = test_reporter.GetIters()[0]; + const double real_accumulated_time = test_reporter.GetRealAccumulatedTimes()[0]; + const double manual_accumulated_time_pow2 = test_reporter.GetManualAccumulatedTimePow2s()[0]; + + const double rel_accuracy = std::sqrt(manual_accumulated_time_pow2 / iters - std::pow(real_accumulated_time / iters, 2.)) / (real_accumulated_time / iters) / sqrt(iters); + assert(rel_accuracy <= 0.01); + + delete[] fake_argv; + return 0; +} From 32afbca68cbee65732ad8a37615488582ff51230 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Fri, 26 Jan 2024 14:41:26 +0100 Subject: [PATCH 4/6] Improvements to comments thanks to review --- include/benchmark/benchmark.h | 4 ++-- src/benchmark.cc | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 526714c3b3..5670dbc5d2 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -1206,9 +1206,9 @@ class BENCHMARK_EXPORT Benchmark { // Set the minimum relative accuracy to use to determine the required number // of iterations when running this benchmark. This option overrides // the `benchmark_min_rel_accuracy` flag. - // REQUIRES: `t > 0`, `Iterations` has not been called on this benchmark, and + // REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and // time is measured manually, i.e., `UseManualTime` has been called on this - // benchmark and each benchmark iteration should call SetIterationTime(seconds) + // benchmark and each benchmark iteration should call `SetIterationTime(seconds)` // to report the measured time. Benchmark* MinRelAccuracy(double r); diff --git a/src/benchmark.cc b/src/benchmark.cc index 2c31299ed5..85769bebfb 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -89,8 +89,9 @@ BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); // Manual timers provide per-iteration times. The relative accuracy is // measured as the standard deviation of these per-iteration times divided by // the mean and the square root of the number of iterations. The benchmark is -// run until the specified minimum time or number of iterations is reached -// and the measured relative accuracy meets the specified requirement. +// run until both of the following conditions are fulfilled: +// 1. the specified minimum time or number of iterations is reached +// 2. the measured relative accuracy meets the specified requirement BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0); // Minimum number of seconds a benchmark should be run before results should be From b086a638ae9bdcaa60989bf780f6eb8cda18e183 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Fri, 26 Jan 2024 14:58:27 +0100 Subject: [PATCH 5/6] Reformat thanks to clang format. --- include/benchmark/benchmark.h | 14 ++++----- src/benchmark.cc | 10 +++--- src/benchmark_api_internal.cc | 3 +- src/benchmark_runner.cc | 32 ++++++++++++-------- src/thread_timer.h | 4 +-- test/benchmark_min_rel_accuracy_flag_test.cc | 23 +++++++++----- 6 files changed, 51 insertions(+), 35 deletions(-) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 5670dbc5d2..34cd651d0e 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -126,12 +126,12 @@ template int BM_Sequential(benchmark::State& state) { } BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how long -to run the benchmark. This option overrides the `benchmark_min_time` flag. +Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how +long to run the benchmark. This option overrides the `benchmark_min_time` flag. -If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)` to set -the required minimum relative accuracy used to determine how long to run the benchmark. -This option overrides the `benchmark_min_rel_accuracy` flag. +If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)` +to set the required minimum relative accuracy used to determine how long to run +the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag. void BM_test(benchmark::State& state) { ... body ... @@ -1208,8 +1208,8 @@ class BENCHMARK_EXPORT Benchmark { // the `benchmark_min_rel_accuracy` flag. // REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and // time is measured manually, i.e., `UseManualTime` has been called on this - // benchmark and each benchmark iteration should call `SetIterationTime(seconds)` - // to report the measured time. + // benchmark and each benchmark iteration should call + // `SetIterationTime(seconds)` to report the measured time. Benchmark* MinRelAccuracy(double r); // Set the minimum amount of time to run the benchmark before taking runtimes diff --git a/src/benchmark.cc b/src/benchmark.cc index 85769bebfb..f379faf92e 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -65,12 +65,12 @@ BM_DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Specification of either an exact number of iterations (specified as `x`) -// or a minimum number of seconds (specified as `s`) used to determine how -// long to run the benchmark. +// Specification of either an exact number of iterations (specified as +// `x`) or a minimum number of seconds (specified as `s`) used +// to determine how long to run the benchmark. // -// If the latter format (ie., min seconds) is used, the system may run the benchmark longer -// until the results are considered significant. +// If the latter format (ie., min seconds) is used, the system may run +// the benchmark longer until the results are considered significant. // // For backward compatibility, the `s` suffix may be omitted, in which case, // the specified number is interpreted as the number of seconds. diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 6087fff187..33e707a162 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -53,7 +53,8 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, } if (!IsZero(benchmark->min_rel_accuracy_)) { - name_.min_rel_accuracy = StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_); + name_.min_rel_accuracy = + StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_); } if (!IsZero(benchmark->min_warmup_time_)) { diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 4c582c9bd1..cb8dbc8e3e 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -228,9 +228,9 @@ BenchmarkRunner::BenchmarkRunner( reports_for_family(reports_for_family_), parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), min_time(ComputeMinTime(b, parsed_benchtime_flag)), - min_rel_accuracy(!IsZero(b.min_rel_accuracy()) - ? b.min_rel_accuracy() - : FLAGS_benchmark_min_rel_accuracy), + min_rel_accuracy(!IsZero(b.min_rel_accuracy()) + ? b.min_rel_accuracy() + : FLAGS_benchmark_min_rel_accuracy), min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time), @@ -332,7 +332,8 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( multiplier = is_significant ? multiplier : 10.0; if (!IsZero(GetMinRelAccuracy())) { - multiplier = std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy()); + multiplier = + std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy()); } // So what seems to be the sufficiently-large iteration count? Round up. @@ -354,9 +355,10 @@ bool BenchmarkRunner::ShouldReportIterationResults( return i.results.skipped_ || // Too many iterations already. i.iters >= kMaxIterations || - // We have applied for enough time and the relative accuracy is good enough. - // Relative accuracy is checked only for user provided timers. - (HasSufficientTimeToApply(i) && (!b.use_manual_time() || HasSufficientRelAccuracy(i))); + // We have applied for enough time and the relative accuracy is good + // enough. Relative accuracy is checked only for user provided timers. + (HasSufficientTimeToApply(i) && + (!b.use_manual_time() || HasSufficientRelAccuracy(i))); } double BenchmarkRunner::GetMinTimeToApply() const { @@ -369,19 +371,25 @@ double BenchmarkRunner::GetMinTimeToApply() const { } double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const { - return std::sqrt(i.seconds_pow2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters); + return std::sqrt(i.seconds_pow2 / i.iters - + std::pow(i.seconds / i.iters, 2.)) / + (i.seconds / i.iters) / sqrt(i.iters); } -bool BenchmarkRunner::HasSufficientTimeToApply(const IterationResults& i) const { +bool BenchmarkRunner::HasSufficientTimeToApply( + const IterationResults& i) const { return i.seconds >= GetMinTimeToApply() || // CPU time is specified but the elapsed real time greatly exceeds // the minimum time. // Note that user provided timers are except from this test. - (!b.use_manual_time() && i.results.real_time_used >= 5 * GetMinTimeToApply()); + (!b.use_manual_time() && + i.results.real_time_used >= 5 * GetMinTimeToApply()); } -bool BenchmarkRunner::HasSufficientRelAccuracy(const IterationResults& i) const { - return (IsZero(GetMinRelAccuracy()) || (GetRelAccuracy(i) <= GetMinRelAccuracy())); +bool BenchmarkRunner::HasSufficientRelAccuracy( + const IterationResults& i) const { + return (IsZero(GetMinRelAccuracy()) || + (GetRelAccuracy(i) <= GetMinRelAccuracy())); } void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { diff --git a/src/thread_timer.h b/src/thread_timer.h index 464974f73f..ffe3c9f3ba 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -39,8 +39,8 @@ class ThreadTimer { // Called by each thread void SetIterationTime(double seconds) { - manual_time_used_ += seconds; - manual_time_used_pow2_ += std::pow(seconds, 2.); + manual_time_used_ += seconds; + manual_time_used_pow2_ += std::pow(seconds, 2.); } bool running() const { return running_; } diff --git a/test/benchmark_min_rel_accuracy_flag_test.cc b/test/benchmark_min_rel_accuracy_flag_test.cc index 0221e3ba3d..c4c85f7eea 100644 --- a/test/benchmark_min_rel_accuracy_flag_test.cc +++ b/test/benchmark_min_rel_accuracy_flag_test.cc @@ -8,8 +8,8 @@ #include "benchmark/benchmark.h" -// Tests that if a benchmark measures time manually, we can specify the required relative accuracy with -// --benchmark_min_rel_accuracy=. +// Tests that if a benchmark measures time manually, we can specify the required +// relative accuracy with --benchmark_min_rel_accuracy=. namespace { class TestReporter : public benchmark::ConsoleReporter { @@ -22,7 +22,8 @@ class TestReporter : public benchmark::ConsoleReporter { assert(report.size() == 1); iters_.push_back(report[0].iterations); real_accumulated_times_.push_back(report[0].real_accumulated_time); - manual_accumulated_time_pow2s_.push_back(report[0].manual_accumulated_time_pow2); + manual_accumulated_time_pow2s_.push_back( + report[0].manual_accumulated_time_pow2); ConsoleReporter::ReportRuns(report); }; @@ -61,7 +62,8 @@ static void BM_MyBench(benchmark::State& state) { BENCHMARK(BM_MyBench)->UseManualTime(); int main(int argc, char** argv) { - // Make a fake argv and append the new --benchmark_min_rel_accuracy= to it. + // Make a fake argv and append the new + // --benchmark_min_rel_accuracy= to it. int fake_argc = argc + 2; const char** fake_argv = new const char*[static_cast(fake_argc)]; for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; @@ -77,10 +79,15 @@ int main(int argc, char** argv) { // Check the executed iters. const benchmark::IterationCount iters = test_reporter.GetIters()[0]; - const double real_accumulated_time = test_reporter.GetRealAccumulatedTimes()[0]; - const double manual_accumulated_time_pow2 = test_reporter.GetManualAccumulatedTimePow2s()[0]; - - const double rel_accuracy = std::sqrt(manual_accumulated_time_pow2 / iters - std::pow(real_accumulated_time / iters, 2.)) / (real_accumulated_time / iters) / sqrt(iters); + const double real_accumulated_time = + test_reporter.GetRealAccumulatedTimes()[0]; + const double manual_accumulated_time_pow2 = + test_reporter.GetManualAccumulatedTimePow2s()[0]; + + const double rel_accuracy = + std::sqrt(manual_accumulated_time_pow2 / iters - + std::pow(real_accumulated_time / iters, 2.)) / + (real_accumulated_time / iters) / sqrt(iters); assert(rel_accuracy <= 0.01); delete[] fake_argv; From d3990ce956be30c5be2f8e42fa5b63ea3f91e3f8 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Fri, 24 May 2024 13:09:30 +0200 Subject: [PATCH 6/6] Static cast to avoid conversion warning --- src/benchmark_runner.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index cb8dbc8e3e..751dd972e9 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -371,9 +371,7 @@ double BenchmarkRunner::GetMinTimeToApply() const { } double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const { - return std::sqrt(i.seconds_pow2 / i.iters - - std::pow(i.seconds / i.iters, 2.)) / - (i.seconds / i.iters) / sqrt(i.iters); + return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast(i.iters)) / i.seconds; } bool BenchmarkRunner::HasSufficientTimeToApply(