597 changes: 414 additions & 183 deletions std/benchmark.d
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Source: $(PHOBOSSRC std/_benchmark.d)
*/
module std.benchmark;
import std.conv, std.datetime, std.functional, std.traits;
import std.algorithm, std.array, std.conv, std.datetime,
std.functional, std.traits, std.typecons;
import core.exception;

//==============================================================================
Expand Down Expand Up @@ -234,6 +235,25 @@ public:
assert(t2 == t3);
}

/**
Returns $(D true) iff the $(D StopWatch) object is currently
_running.
Example:
----
StopWatch sw;
assert(!sw.running);
sw.start();
assert(sw.running);
sw.stop();
assert(!sw.running);
----
*/
@property bool running() const pure
{
return _flagStarted;
}

private:

// true if observing.
Expand All @@ -253,69 +273,81 @@ private:
return aliases.length;
}

/**
Benchmarks one or more functions for speed assessment and
comparison. A baseline timing that accounts for benchmarking overheads
is kept along with the results and automatically deducted from all
timings. A timing indistinguishable from the baseline looping overhead
appears with a run time of zero and indicates a function that does too
little work to be timed.
/++
Benchmarks code for speed assessment and comparison.
Params:
Params:
fun = aliases of callable objects (e.g. function names). Each should
take no arguments.
n = The number of times each function is to be executed.
fun = Aliases of callable objects (e.g. function names). Each should
take either no arguments or one integral argument, which is the
iterations count.
n = The number of times each function is to be executed.
Returns:
The amount of time (as a $(CXREF time, TickDuration)) that it took to
call each function $(D n) times. The first value is the length of time
that it took to call $(D fun[0]) $(D n) times. The second value is the
length of time it took to call $(D fun[1]) $(D n) times. Etc.
Returns:
Examples:
The amount of time (as a $(CXREF time, TickDuration)) that it took to
call each function $(D n) times. The first value is the length of time
that it took to call $(D fun[0]) $(D n) times. The second value is the
length of time it took to call $(D fun[1]) $(D n) times. Etc.
Example:
--------------------
int a;
void f0() {}
void f1() {auto b = a;}
void f2() {auto b = to!(string)(a);}
auto r = benchmark!(f0, f1, f2)(10_000_000);
writefln("Milliseconds to call fun[0] n times: %s", r[0].to!("msecs", int));
void intToString() {auto b = to!string(a);}
void intToWstring() {auto b = to!wstring(a);}
void intToDstring() {auto b = to!dstring(a);}
auto r = benchmark!(intToString, intToWstring, intToDstring)(10_000_000);
auto names = [ "intToString", "intToWstring", "intToDstring" ];
foreach (i, timing; r)
writefln("%s: %sms", names[i], r[i].to!("msecs", int));
--------------------
+/
@safe TickDuration[lengthof!(fun)()] benchmark(fun...)(uint times)
if(areAllSafe!fun)
If one or more of the functions being benchmarked accept one numeric
argument convertible from $(D uint), then $(D benchmark) assumes that
function does its own iteration internally and simply passes the
iteration count to that function.
Example:
--------------------
// Variant with internal iteration
int a;
void intToString(uint n) {foreach (i; 0 .. n) auto b = to!string(a);}
void intToWstring(uint n) {foreach (i; 0 .. n) auto b = to!wstring(a);}
void intToDstring(uint n) {foreach (i; 0 .. n) auto b = to!dstring(a);}
auto r = benchmark!(intToString, intToWstring, intToDstring)(10_000_000);
auto names = [ "intToString", "intToWstring", "intToDstring" ];
foreach (i, timing; r)
writefln("%s: %sms", names[i], r[i].to!("msecs", int));
--------------------
*/
auto benchmark(fun...)(uint times)
if ((is(typeof(fun[0]())) || is(typeof(fun[0](times))))
&& (lengthof!fun() == 1 || is(typeof(benchmark!(fun[1 .. $])(times)))))
{
TickDuration[lengthof!(fun)()] result;
StopWatch sw;
sw.start();
// Baseline function. Use asm inside the body to avoid
// optimizations.
static void baseline() { asm { nop; } }

// Use a local pointer to avoid TLS access overhead
auto theStopWatch = &.theStopWatch;

foreach(i, unused; fun)
// Get baseline loop time
with (*theStopWatch) running ? reset() : start();
foreach(j; 0 .. times)
{
sw.reset();
static if (is(typeof(fun[i](times))))
{
fun[i](times);
}
else
{
foreach(j; 0 .. times)
{
fun[i]();
}
}
result[i] = sw.peek();
baseline();
}
auto baselineTime = theStopWatch.peek();

return result;
}

/++ Ditto +/
TickDuration[lengthof!(fun)()] benchmark(fun...)(uint times)
if(!areAllSafe!fun)
{
TickDuration[lengthof!(fun)()] result;
StopWatch sw;
sw.start();

foreach(i, unused; fun)
TickDuration[lengthof!fun()] result;
foreach (i, unused; fun)
{
sw.reset();
theStopWatch.reset();
static if (is(typeof(fun[i](times))))
{
fun[i](times);
Expand All @@ -327,134 +359,152 @@ TickDuration[lengthof!(fun)()] benchmark(fun...)(uint times)
fun[i]();
}
}
result[i] = sw.peek();
auto elapsed = theStopWatch.peek();

// Subtract baseline
result[i] = elapsed < baselineTime
? result[i].init
: elapsed - baselineTime;
}

return result;
}

//Verify Examples.
// Verify Example 1
unittest
{
void writefln(S...)(S args){}

int a;
void f0() {}
void f1() {auto b = a;}
void f2() {auto b = to!(string)(a);}
auto r = benchmark!(f0, f1, f2)(10_000_000);
writefln("Milliseconds to call fun[0] n times: %s", r[0].to!("msecs", int));
void intToString() {auto b = to!string(a);}
void intToWstring() {auto b = to!wstring(a);}
void intToDstring() {auto b = to!dstring(a);}
auto r = benchmark!(intToString, intToWstring, intToDstring)(10_000_000);
auto names = [ "intToString", "intToWstring", "intToDstring" ];
foreach (i, timing; r)
writefln("%s: %sms", names[i], r[i].to!("msecs", int));
}

@safe unittest
// Verify Example 2
unittest
{
void writefln(S...)(S args){}

int a;
void f0() {}
//void f1() {auto b = to!(string)(a);}
void f2() {auto b = (a);}
auto r = benchmark!(f0, f2)(100);
void intToString(uint n) {foreach (i; 0 .. n) auto b = to!string(a);}
void intToWstring(uint n) {foreach (i; 0 .. n) auto b = to!wstring(a);}
void intToDstring(uint n) {foreach (i; 0 .. n) auto b = to!dstring(a);}
auto r = benchmark!(intToString, intToWstring, intToDstring)(10_000_000);
auto names = [ "intToString", "intToWstring", "intToDstring" ];
foreach (i, timing; r)
writefln("%s: %sms", names[i], r[i].to!("msecs", int));
}

/**
Benchmarks one or more functions, automatically issuing multiple calls
to achieve good accuracy. A baseline timing that accounts for
benchmarking overheads is kept along with the results and
automatically deducted from all timings.
Benchmarks one function, automatically issuing multiple calls to
achieve good accuracy.
The call $(D benchmark(fun)()) first calls $(D fun) once. If the call
The call $(D benchmark!fun()) first calls $(D fun) once. If the call
completed too fast to gather an accurate timing, $(D fun) is called 10
times, then 100 times and so on, until a meaningful timing is
collected.
The returned value is a tuple containing the number of iterations in
the first member and the times in $(D TickDuration) format for each
function being benchmarked.
Params:
fun = Alias of callable object (e.g. function name). It should take
either no arguments or one integral argument, which is the iterations
count.
A timing indistinguishable from the baseline looping overhead appears
with a run time of zero and indicates a function that does too little
work to be timed.
Returns:
A tuple containing the number of iterations in the first member and
the time in $(D TickDuration) format for the function being
benchmarked.
Example:
----
import std.conv;
int a;
void fun() {auto b = to!(string)(a);}
auto r = benchmark!(fun)(10_000_000);
void fun() { auto b = to!(string)(a); }
auto r = benchmark!fun();
writefln("Milliseconds to call fun() %s times: %s",
r[0], r[1][0].to!("msecs", int));
----
Since the number of iteration is the same for all functions tested,
one call to $(D benchmark) should only group together functions of
close performance profile; otherwise, slower functions will take a
long time to run catching up with faster functions.
*/
TickDuration[lengthof!(fun)()] benchmark2(fun...)()
auto benchmark(alias fun)()
if (is(typeof(benchmark!fun(1u))))
{
// Baseline function. Use asm inside the body to avoid
// optimizations.
static void baseline() { asm { nop; } }
alias .benchmark!(fun, baseline) run;

uint n = 1;
typeof(run(n)) timings;
TickDuration elapsed;
bigloop:
for (; n < 1_000_000_000; n *= 10)
{
timings = run(n);
foreach (t; timings[0 .. $ - 1])
elapsed = benchmark!fun(n)[0];
if (elapsed.to!("msecs", int) < 10)
{
if (t.to!("msecs", int) < 100)
{
continue bigloop;
}
continue bigloop;
}
break;
}

//writeln("iterations: ", n);
auto baselineMs = timings[$ - 1].to!("msecs", int);
//writeln("baseline: ", baselineMs);

foreach (i, unused; fun)
{
auto ms = timings[i].to!("msecs", int);
if (ms >= baselineMs + 10)
{
timings[i] -= timings[$ - 1];
//writeln(fun[i].stringof, ": ", timings[i].to!("msecs", int));
}
else
{
timings[i] = timings[i].init;
//writeln(fun[i].stringof, ": indistinguishable from baseline");
}
}

return tuple(n, /*cast(TickDuration[timings.length - 1])*/ timings);
return tuple(n, elapsed);
}

/**
Benchmarks an entire module given its name. Benchmarking proceeds as
follows: all symbols inside the module are enumerated, and those that
Benchmarks an entire module given its name. Benchmarking proceeds
as follows: all symbols inside the module are enumerated, and those that
start with "benchmark_" are considered benchmark functions and are
timed using $(D benchmark) defined above.
This function prints a table containing the benchmark name (excluding
the $(D "benchmark_") prefix), the number of calls issued, the average
duration per call, and the speed in calls per second.
This function prints to $(D target) a table containing for each
benchmark the function name (excluding the $(D "benchmark_") prefix),
the number of calls issued, the average duration per call, and the
speed in calls per second.
Example:
----
// file module_one.d
module module_one;
import std.file, std.array;
void benchmark_fileWrite()
{
std.file.write("hello, world!");
std.file.write("/tmp/deleteme", "hello, world!");
}
void benchmark_fileRead()
{
std.file.read("/tmp/deleteme");
}
void main()
{
benchmarkModule!"module_one"();
}
----
The program above prints a table with the benchmark results in the
following format.
----
===============================================================================
Benchmark relative iters t/iter iters/s
===============================================================================
fileWrite 1E+02 1.768E+02μs 5.657E+03
fileRead 1E+03 4.208E+01μs 2.376E+04
===============================================================================
----
The $(D calls) column contains the number of iterations through the function
(always a multiple of 10). The $(D t/iter) column is the time taken by
each iteration (smaller is better), and column $(D iters/s) shows the
iterations per second (larger is better).
It is possible to benchmark functions in groups so as to see relative
differences between functions. For example, say we add the following
functions to $(D module_one) above:
void benchmark_stringAppend(uint n)
----
void benchmark_append_builtin(uint n)
{
string a;
foreach (i; 0 .. n)
Expand All @@ -463,7 +513,7 @@ void benchmark_stringAppend(uint n)
}
}
void benchmark_appender(uint n)
void benchmark_append_appender(uint n)
{
auto s = appender!string();
foreach (i; 0 .. n)
Expand All @@ -472,72 +522,265 @@ void benchmark_appender(uint n)
}
}
// file module_two.d
import module_one;
void main()
void benchmark_append_concat(uint n)
{
benchmarkModule!module_one();
string a;
foreach (i; 0 .. n)
{
a = a ~ 'x';
}
}
----
The program above prints a table with the benchmark results. Note that
a benchmark function may either take no parameters, indicating that
the iteration should be done outside, or accept one $(D uint)
parameter, indicating that it does iteration on its own.
*/
void benchmarkModule(string mod)()
In this case, $(D benchmarkModule) detects that $(D
append_builtin) and $(D append_appender) contain the common prefix $(D
append_) and groups them together when benchmarking. The output table
in that case looks like this:
----
===============================================================================
Benchmark relative calls t/call calls/s
===============================================================================
fileWrite 1E+02 1.70E+02μs 5.87E+03
fileRead 1E+03 3.96E+01μs 2.52E+04
append:builtin 1E+06 7.80E-02μs 1.28E+07
appender 6.15E+00x 1E+06 1.20E-02μs 7.85E+07
concat 4.94E-02x 1E+04 1.51E+00μs 6.63E+05
===============================================================================
----
The benchmark has filled the column titled $(D relative) with $(D
6.15E+00x) for $(D appender) (which means in this run $(D append_appender) was $(D 6.15)
times faster than $(D append_builtin)), and with $(D 4.94E-02x) for
$(D concat) (meaning that $(D append_concat)'s speed was $(D 0.0494) of
$(D append_builtin)'s speed).
*/
void benchmarkModule(string mod)(File target = stdout)
{
write(
"benchmark calls t/call calls/sec\n"
"------------------------------------------------------------------------------\n");
struct TestResult
{
string name;
string groupName;
uint iterations;
TickDuration time;
TickDuration timePerIteration;
double itersPerSecond;
double ratio = 0;
}

static struct Local
TestResult[] results;

// Step 1: fill the results with name information
{
// Import stuff here so we have access to it
mixin("import " ~ mod ~ ";");

static void doBenchmark()
foreach (entity; mixin("__traits(allMembers, " ~ mod ~ ")"))
{
struct TestResults
static if (entity.length >= 10
&& entity[0 .. 10] == "benchmark_")
{
string name;
uint iterations;
TickDuration time;
TickDuration timePerIteration;
double itersPerSecond;
++results.length;
results.back().name = entity[10 .. $];
auto sub = findSplit(results.back().name, "_");
if (!sub[1].empty)
{
results.back().groupName = sub[0];
results.back().name = sub[2];
}
}
}
}

TestResults[] results;
foreach (entity; mixin("__traits(allMembers, " ~ mod ~ ")"))
size_t index;

void collectResult(string entity, Tuple!(uint, TickDuration) stats)
{
scope(exit) ++index;
if (index == 0)
{
target.writefln(
"=================================================="
"=============================\n"
"%-42s%8s%7s%11s%10s\n" // sum must be 79
"================================================="
"==============================", "Benchmark",
"relative", "calls", "t/call", "calls/s");
}
with (results[index])
{
iterations = stats[0];
time = stats[1];
timePerIteration = time / iterations;
itersPerSecond = iterations /
time.to!("seconds", double);

// Format and print
string format, printedName;
if (groupName && index > 0
&& groupName == results[index - 1].groupName)
{
static if (entity.length >= 10
&& entity[0 .. 10] == "benchmark_")
{
auto r = mixin("benchmark2!(" ~ mod ~ "."
~ entity ~ ")()");
++results.length;
with (results[$ - 1])
{
name = entity[10 .. $];
iterations = r[0];
time = r[1][0];
timePerIteration = time / iterations;
itersPerSecond = iterations /
time.to!("seconds", double);
writefln("%-44s %1.3E %1.3Eμs %1.3E",
name, cast(double) iterations,
timePerIteration.to!("usecs", double),
itersPerSecond);
}
}
// This is part of a group of related benchmarks
size_t reference = index - 1;
while (results[reference].groupName == groupName)
--reference;
++reference;
ratio = itersPerSecond / results[reference].itersPerSecond;
format = "%1$-40s %5$1.2Ex %2$1.0E %3$1.2Eμs %4$1.2E";
printedName = replicate(" ", groupName.length + 1) ~ name;
}
else
{
// No grouping
format = "%1$-51s %2$1.0E %3$1.2Eμs %4$1.2E";
printedName = groupName ? groupName~":"~name : name;
}
target.writefln(format,
printedName, cast(double) iterations,
timePerIteration.to!("usecs", double),
itersPerSecond, ratio);
}
}

Local.doBenchmark();
writeln(
"------------------------------------------------------------------------------");
{
// Import stuff here so we have access to it
mixin("import " ~ mod ~ ";");
foreach (entity; mixin("__traits(allMembers, " ~ mod ~ ")"))
{
static if (entity.length >= 10
&& entity[0 .. 10] == "benchmark_")
{
auto r = mixin("benchmark!(" ~ mod ~ "."
~ entity ~ ")()");
collectResult(entity, r);
}
}
}

if (results.length)
{
target.writeln(
"========================================================="
"======================");
}
}

void benchmark_fileWrite()
{
std.file.write("/tmp/deleteme", "hello, world!");
}

void benchmark_fileRead()
{
std.file.read("/tmp/deleteme");
}

void benchmark_append_builtin(uint n)
{
string a;
foreach (i; 0 .. n)
{
a ~= 'x';
}
}

void benchmark_append_appender(uint n)
{
import std.range;
auto a = appender!string();
foreach (i; 0 .. n)
{
put(a, 'x');
}
}

void benchmark_append_concat(uint n)
{
string a;
foreach (i; 0 .. n)
{
a = a ~ 'x';
}
}

unittest
{
version(StdRunBenchmarks) benchmarkModule!"std.benchmark"();
}

// One benchmark stopwatch
private StopWatch theStopWatch;

/**
Pauses and resumes the current benchmark, respectively. This is useful
if the benchmark needs to set things up before performing the
measurement.
Example:
----
import std.algorithm;
void benchmark_findDouble(uint n)
{
// Fill an array of random numbers and an array of random indexes
benchmarkPause();
auto array = new double[n];
auto indexes = new size_t[n];
foreach (i; 0 .. n)
{
array[i] = uniform(0.0, 1000.0);
indexes[i] = uniform(0, n);
}
benchmarkResume();
// The actual benchmark begins here
foreach (i; 0 .. n)
{
auto balance = std.algorithm.find(array, array[indexes[i]]);
}
}
unittest
{
writeln(benchmark!benchmark_findDouble(10_000).to!("msecs", uint));
}
----
*/
void benchmarkPause()
{
theStopWatch.stop();
}

/// Ditto
void benchmarkResume()
{
theStopWatch.start();
}

version(unittest) import std.random, std.stdio;
unittest
{
void benchmark_findDouble(uint n)
{
// Fill an array of random numbers and an array of random indexes
benchmarkPause();
auto array = new double[n];
auto indexes = new size_t[n];
foreach (i; 0 .. n)
{
array[i] = uniform(0.0, 1000.0);
indexes[i] = uniform(0, n);
}
benchmarkResume();

// The actual benchmark begins here
foreach (i; 0 .. n)
{
auto balance = std.algorithm.find(array, array[indexes[i]]);
}
}

//writeln(benchmark!benchmark_findDouble(10_000)[0].to!("msecs",
//uint));
benchmark!benchmark_findDouble(10_000);
}

/++
Expand Down Expand Up @@ -611,28 +854,16 @@ void main() {
}
--------------------
+/
@safe ComparingBenchmarkResult comparingBenchmark(alias baseFunc,
alias targetFunc,
int times = 0xfff)()
if(isSafe!baseFunc && isSafe!targetFunc)
{
auto t = benchmark!(baseFunc, targetFunc)(times);
return ComparingBenchmarkResult(t[0], t[1]);
}


/++ Ditto +/
ComparingBenchmarkResult comparingBenchmark(alias baseFunc,
alias targetFunc,
int times = 0xfff)()
if(!isSafe!baseFunc || !isSafe!targetFunc)
{
auto t = benchmark!(baseFunc, targetFunc)(times);
return ComparingBenchmarkResult(t[0], t[1]);
}


@safe unittest
@trusted unittest
{
void f1x() {}
void f2x() {}
Expand Down