diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 08cfe29da..34cd651d0 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -126,8 +126,12 @@ template int BM_Sequential(benchmark::State& state) { } BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -Use `Benchmark::MinTime(double t)` to set the minimum time used to run the -benchmark. This option overrides the `benchmark_min_time` flag. +Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how +long to run the benchmark. This option overrides the `benchmark_min_time` flag. + +If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)` +to set the required minimum relative accuracy used to determine how long to run +the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag. void BM_test(benchmark::State& state) { ... body ... @@ -1193,11 +1197,21 @@ class BENCHMARK_EXPORT Benchmark { // multiplier kRangeMultiplier will be used. Benchmark* RangeMultiplier(int multiplier); - // Set the minimum amount of time to use when running this benchmark. This - // option overrides the `benchmark_min_time` flag. + // Set the minimum amount of time to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_time` flag. // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum relative accuracy to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_rel_accuracy` flag. + // REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and + // time is measured manually, i.e., `UseManualTime` has been called on this + // benchmark and each benchmark iteration should call + // `SetIterationTime(seconds)` to report the measured time. + Benchmark* MinRelAccuracy(double r); + // Set the minimum amount of time to run the benchmark before taking runtimes // of this benchmark into account. This // option overrides the `benchmark_min_warmup_time` flag. @@ -1320,6 +1334,7 @@ class BENCHMARK_EXPORT Benchmark { int range_multiplier_; double min_time_; + double min_rel_accuracy_; double min_warmup_time_; IterationCount iterations_; int repetitions_; @@ -1751,6 +1766,7 @@ struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; + std::string min_rel_accuracy; std::string min_warmup_time; std::string iterations; std::string repetitions; @@ -1790,6 +1806,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { threads(1), time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), + manual_accumulated_time_pow2(0), cpu_accumulated_time(0), max_heapbytes_used(0), use_real_time_for_initial_big_o(false), @@ -1818,6 +1835,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { int64_t repetitions; TimeUnit time_unit; double real_accumulated_time; + double manual_accumulated_time_pow2; double cpu_accumulated_time; // Return a value representing the real time per iteration in the unit diff --git a/src/benchmark.cc b/src/benchmark.cc index 337bb3faa..f379faf92 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -65,12 +65,12 @@ BM_DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Specification of how long to run the benchmark. +// Specification of either an exact number of iterations (specified as +// `x`) or a minimum number of seconds (specified as `s`) used +// to determine how long to run the benchmark. // -// It can be either an exact number of iterations (specified as `x`), -// or a minimum number of seconds (specified as `s`). If the latter -// format (ie., min seconds) is used, the system may run the benchmark longer -// until the results are considered significant. +// If the latter format (ie., min seconds) is used, the system may run +// the benchmark longer until the results are considered significant. // // For backward compatibility, the `s` suffix may be omitted, in which case, // the specified number is interpreted as the number of seconds. @@ -81,6 +81,19 @@ BM_DEFINE_string(benchmark_filter, ""); // benchmark execution, regardless of number of threads. BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); +// Specification of required relative accuracy used to determine how +// long to run the benchmark. +// +// REQUIRES: time is measured manually. +// +// Manual timers provide per-iteration times. The relative accuracy is +// measured as the standard deviation of these per-iteration times divided by +// the mean and the square root of the number of iterations. The benchmark is +// run until both of the following conditions are fulfilled: +// 1. the specified minimum time or number of iterations is reached +// 2. the measured relative accuracy meets the specified requirement +BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0); + // Minimum number of seconds a benchmark should be run before results should be // taken into account. This e.g can be necessary for benchmarks of code which // needs to fill some form of cache before performance is of interest. @@ -703,6 +716,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy", + &FLAGS_benchmark_min_rel_accuracy) || ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", @@ -770,7 +785,8 @@ void PrintDefaultHelp() { "benchmark" " [--benchmark_list_tests={true|false}]\n" " [--benchmark_filter=]\n" - " [--benchmark_min_time=`x` OR `s` ]\n" + " [--benchmark_min_time=`x` OR `s`]\n" + " [--benchmark_min_rel_accuracy=]\n" " [--benchmark_min_warmup_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_enable_random_interleaving={true|false}]\n" diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 286f98653..33e707a16 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), min_time_(benchmark_.min_time_), + min_rel_accuracy_(benchmark_.min_rel_accuracy_), min_warmup_time_(benchmark_.min_warmup_time_), iterations_(benchmark_.iterations_), threads_(thread_count) { @@ -51,6 +52,11 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); } + if (!IsZero(benchmark->min_rel_accuracy_)) { + name_.min_rel_accuracy = + StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_); + } + if (!IsZero(benchmark->min_warmup_time_)) { name_.min_warmup_time = StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 94f516531..38e5e40d9 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -36,6 +36,7 @@ class BenchmarkInstance { const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } + double min_rel_accuracy() const { return min_rel_accuracy_; } double min_warmup_time() const { return min_warmup_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } @@ -63,6 +64,7 @@ class BenchmarkInstance { const std::vector& statistics_; int repetitions_; double min_time_; + double min_rel_accuracy_; double min_warmup_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 8ade04822..13ba9e554 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -211,6 +211,7 @@ Benchmark::Benchmark(const std::string& name) use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), + min_rel_accuracy_(0), min_warmup_time_(0), iterations_(0), repetitions_(0), @@ -356,6 +357,14 @@ Benchmark* Benchmark::MinTime(double t) { return this; } +Benchmark* Benchmark::MinRelAccuracy(double r) { + BM_CHECK(r > 0.0); + BM_CHECK(iterations_ == 0); + BM_CHECK(use_manual_time_); + min_rel_accuracy_ = r; + return this; +} + Benchmark* Benchmark::MinWarmUpTime(double t) { BM_CHECK(t >= 0.0); BM_CHECK(iterations_ == 0); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index a74bdadd3..751dd972e 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -93,6 +93,7 @@ BenchmarkReporter::Run CreateRunReport( if (!report.skipped) { if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; + report.manual_accumulated_time_pow2 = results.manual_time_used_pow2; } else { report.real_accumulated_time = results.real_time_used; } @@ -140,6 +141,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, results.cpu_time_used += timer.cpu_time_used(); results.real_time_used += timer.real_time_used(); results.manual_time_used += timer.manual_time_used(); + results.manual_time_used_pow2 += timer.manual_time_used_pow2(); results.complexity_n += st.complexity_length_n(); internal::Increment(&results.counters, st.counters); } @@ -225,7 +227,10 @@ BenchmarkRunner::BenchmarkRunner( : b(b_), reports_for_family(reports_for_family_), parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), - min_time(ComputeMinTime(b_, parsed_benchtime_flag)), + min_time(ComputeMinTime(b, parsed_benchtime_flag)), + min_rel_accuracy(!IsZero(b.min_rel_accuracy()) + ? b.min_rel_accuracy() + : FLAGS_benchmark_min_rel_accuracy), min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time), @@ -302,8 +307,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; + i.seconds_pow2 = 0; if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; + i.seconds_pow2 = i.results.manual_time_used_pow2; } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } @@ -324,6 +331,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; + if (!IsZero(GetMinRelAccuracy())) { + multiplier = + std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy()); + } + // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( std::llround(std::max(multiplier * static_cast(i.iters), @@ -341,14 +353,12 @@ bool BenchmarkRunner::ShouldReportIterationResults( // Either it has run for a sufficient amount of time // or because an error was reported. return i.results.skipped_ || - i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= - GetMinTimeToApply() || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this test. - ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && - !b.use_manual_time()); + // Too many iterations already. + i.iters >= kMaxIterations || + // We have applied for enough time and the relative accuracy is good + // enough. Relative accuracy is checked only for user provided timers. + (HasSufficientTimeToApply(i) && + (!b.use_manual_time() || HasSufficientRelAccuracy(i))); } double BenchmarkRunner::GetMinTimeToApply() const { @@ -360,6 +370,26 @@ double BenchmarkRunner::GetMinTimeToApply() const { return warmup_done ? min_time : min_warmup_time; } +double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const { + return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast(i.iters)) / i.seconds; +} + +bool BenchmarkRunner::HasSufficientTimeToApply( + const IterationResults& i) const { + return i.seconds >= GetMinTimeToApply() || + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this test. + (!b.use_manual_time() && + i.results.real_time_used >= 5 * GetMinTimeToApply()); +} + +bool BenchmarkRunner::HasSufficientRelAccuracy( + const IterationResults& i) const { + return (IsZero(GetMinRelAccuracy()) || + (GetRelAccuracy(i) <= GetMinRelAccuracy())); +} + void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { warmup_done = true; iters = i; diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index db2fa0439..fe9df08e4 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -26,6 +26,7 @@ namespace benchmark { BM_DECLARE_string(benchmark_min_time); +BM_DECLARE_double(benchmark_min_rel_accuracy); BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); @@ -77,6 +78,8 @@ class BenchmarkRunner { double GetMinTime() const { return min_time; } + double GetMinRelAccuracy() const { return min_rel_accuracy; } + bool HasExplicitIters() const { return has_explicit_iteration_count; } IterationCount GetIters() const { return iters; } @@ -89,6 +92,7 @@ class BenchmarkRunner { BenchTimeType parsed_benchtime_flag; const double min_time; + const double min_rel_accuracy; const double min_warmup_time; bool warmup_done; const int repeats; @@ -110,6 +114,7 @@ class BenchmarkRunner { internal::ThreadManager::Result results; IterationCount iters; double seconds; + double seconds_pow2; }; IterationResults DoNIterations(); @@ -119,6 +124,12 @@ class BenchmarkRunner { double GetMinTimeToApply() const; + double GetRelAccuracy(const IterationResults& i) const; + + bool HasSufficientTimeToApply(const IterationResults& i) const; + + bool HasSufficientRelAccuracy(const IterationResults& i) const; + void FinishWarmUp(const IterationCount& i); void RunWarmUp(); diff --git a/src/thread_manager.h b/src/thread_manager.h index 819b3c44d..e3e6c5a2b 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -41,6 +41,7 @@ class ThreadManager { double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; + double manual_time_used_pow2 = 0; int64_t complexity_n = 0; std::string report_label_; std::string skip_message_; diff --git a/src/thread_timer.h b/src/thread_timer.h index eb23f5956..ffe3c9f3b 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -38,7 +38,10 @@ class ThreadTimer { } // Called by each thread - void SetIterationTime(double seconds) { manual_time_used_ += seconds; } + void SetIterationTime(double seconds) { + manual_time_used_ += seconds; + manual_time_used_pow2_ += std::pow(seconds, 2.); + } bool running() const { return running_; } @@ -60,6 +63,11 @@ class ThreadTimer { return manual_time_used_; } + double manual_time_used_pow2() const { + BM_CHECK(!running_); + return manual_time_used_pow2_; + } + private: double ReadCpuTimerOfChoice() const { if (measure_process_cpu_time) return ProcessCPUUsage(); @@ -78,6 +86,7 @@ class ThreadTimer { double cpu_time_used_ = 0; // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_ = 0; + double manual_time_used_pow2_ = 0; }; } // namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1de175f98..7a907cb05 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,6 +97,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_ compile_benchmark_test(benchmark_min_time_flag_iters_test) benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) +compile_benchmark_test(benchmark_min_rel_accuracy_flag_test) +benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test) + add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) diff --git a/test/benchmark_min_rel_accuracy_flag_test.cc b/test/benchmark_min_rel_accuracy_flag_test.cc new file mode 100644 index 000000000..c4c85f7ee --- /dev/null +++ b/test/benchmark_min_rel_accuracy_flag_test.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that if a benchmark measures time manually, we can specify the required +// relative accuracy with --benchmark_min_rel_accuracy=. +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + iters_.push_back(report[0].iterations); + real_accumulated_times_.push_back(report[0].real_accumulated_time); + manual_accumulated_time_pow2s_.push_back( + report[0].manual_accumulated_time_pow2); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetIters() const { + return iters_; + } + + const std::vector& GetRealAccumulatedTimes() const { + return real_accumulated_times_; + } + + const std::vector& GetManualAccumulatedTimePow2s() const { + return manual_accumulated_time_pow2s_; + } + + private: + std::vector iters_; + std::vector real_accumulated_times_; + std::vector manual_accumulated_time_pow2s_; +}; + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_real_distribution mrand(0, 1); + + for (auto s : state) { + state.SetIterationTime(mrand(rd)); + } +} +BENCHMARK(BM_MyBench)->UseManualTime(); + +int main(int argc, char** argv) { + // Make a fake argv and append the new + // --benchmark_min_rel_accuracy= to it. + int fake_argc = argc + 2; + const char** fake_argv = new const char*[static_cast(fake_argc)]; + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + fake_argv[argc] = "--benchmark_min_time=10s"; + fake_argv[argc + 1] = "--benchmark_min_rel_accuracy=0.01"; + + benchmark::Initialize(&fake_argc, const_cast(fake_argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the executed iters. + const benchmark::IterationCount iters = test_reporter.GetIters()[0]; + const double real_accumulated_time = + test_reporter.GetRealAccumulatedTimes()[0]; + const double manual_accumulated_time_pow2 = + test_reporter.GetManualAccumulatedTimePow2s()[0]; + + const double rel_accuracy = + std::sqrt(manual_accumulated_time_pow2 / iters - + std::pow(real_accumulated_time / iters, 2.)) / + (real_accumulated_time / iters) / sqrt(iters); + assert(rel_accuracy <= 0.01); + + delete[] fake_argv; + return 0; +}