Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bench: Add support for measuring CPU cycles #9202

Merged
merged 1 commit into from Nov 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/Makefile.bench.include
Expand Up @@ -22,7 +22,9 @@ bench_bench_bitcoin_SOURCES = \
bench/mempool_eviction.cpp \
bench/verify_script.cpp \
bench/base58.cpp \
bench/lockedpool.cpp
bench/lockedpool.cpp \
bench/perf.cpp \
bench/perf.h

nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_TEST_FILES)

Expand Down
22 changes: 20 additions & 2 deletions src/bench/bench.cpp
Expand Up @@ -3,6 +3,7 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include "bench.h"
#include "perf.h"

#include <iostream>
#include <iomanip>
Expand All @@ -26,7 +27,9 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func)
void
BenchRunner::RunAll(double elapsedTimeForOne)
{
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n";
perf_init();
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << ","
<< "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";

for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin();
it != benchmarks.end(); ++it) {
Expand All @@ -35,6 +38,7 @@ BenchRunner::RunAll(double elapsedTimeForOne)
BenchFunction& func = it->second;
func(state);
}
perf_fini();
}

bool State::KeepRunning()
Expand All @@ -44,15 +48,24 @@ bool State::KeepRunning()
return true;
}
double now;
uint64_t nowCycles;
if (count == 0) {
lastTime = beginTime = now = gettimedouble();
lastCycles = beginCycles = nowCycles = perf_cpucycles();
}
else {
now = gettimedouble();
double elapsed = now - lastTime;
double elapsedOne = elapsed * countMaskInv;
if (elapsedOne < minTime) minTime = elapsedOne;
if (elapsedOne > maxTime) maxTime = elapsedOne;

// We only use relative values, so don't have to handle 64-bit wrap-around specially
nowCycles = perf_cpucycles();
uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv;
if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles;
if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles;

if (elapsed*128 < maxElapsed) {
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
// The restart avoids including the overhead of this code in the measurement.
Expand All @@ -61,6 +74,8 @@ bool State::KeepRunning()
count = 0;
minTime = std::numeric_limits<double>::max();
maxTime = std::numeric_limits<double>::min();
minCycles = std::numeric_limits<uint64_t>::max();
maxCycles = std::numeric_limits<uint64_t>::min();
return true;
}
if (elapsed*16 < maxElapsed) {
Expand All @@ -72,6 +87,7 @@ bool State::KeepRunning()
}
}
lastTime = now;
lastCycles = nowCycles;
++count;

if (now - beginTime < maxElapsed) return true; // Keep going
Expand All @@ -80,7 +96,9 @@ bool State::KeepRunning()

// Output results
double average = (now-beginTime)/count;
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n";
int64_t averageCycles = (nowCycles-beginCycles)/count;
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << ","
<< minCycles << "," << maxCycles << "," << averageCycles << "\n";

return false;
}
10 changes: 8 additions & 2 deletions src/bench/bench.h
Expand Up @@ -41,12 +41,18 @@ namespace benchmark {
double maxElapsed;
double beginTime;
double lastTime, minTime, maxTime, countMaskInv;
int64_t count;
int64_t countMask;
uint64_t count;
uint64_t countMask;
uint64_t beginCycles;
uint64_t lastCycles;
uint64_t minCycles;
uint64_t maxCycles;
public:
State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) {
minTime = std::numeric_limits<double>::max();
maxTime = std::numeric_limits<double>::min();
minCycles = std::numeric_limits<uint64_t>::max();
maxCycles = std::numeric_limits<uint64_t>::min();
countMask = 1;
countMaskInv = 1./(countMask + 1);
}
Expand Down
53 changes: 53 additions & 0 deletions src/bench/perf.cpp
@@ -0,0 +1,53 @@
// Copyright (c) 2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include "perf.h"

#if defined(__i386__) || defined(__x86_64__)

/* These architectures support quering the cycle counter
* from user space, no need for any syscall overhead.
*/
void perf_init(void) { }
void perf_fini(void) { }

#elif defined(__linux__)

#include <unistd.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>

static int fd = -1;
static struct perf_event_attr attr;

void perf_init(void)
{
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
}

void perf_fini(void)
{
if (fd != -1) {
close(fd);
}
}

uint64_t perf_cpucycles(void)
{
uint64_t result = 0;
if (fd == -1 || read(fd, &result, sizeof(result)) < (ssize_t)sizeof(result)) {
return 0;
}
return result;
}

#else /* Unhandled platform */

void perf_init(void) { }
void perf_fini(void) { }
uint64_t perf_cpucycles(void) { return 0; }

#endif
37 changes: 37 additions & 0 deletions src/bench/perf.h
@@ -0,0 +1,37 @@
// Copyright (c) 2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

/** Functions for measurement of CPU cycles */
#ifndef H_PERF
#define H_PERF

#include <stdint.h>

#if defined(__i386__)

static inline uint64_t perf_cpucycles(void)
{
uint64_t x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}

#elif defined(__x86_64__)

static inline uint64_t perf_cpucycles(void)
{
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
}
#else

uint64_t perf_cpucycles(void);

#endif

void perf_init(void);
void perf_fini(void);

#endif // H_PERF