Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 409 lines (368 sloc) 12.88 kb
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
1 /*
2 * Copyright 2012 Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef FOLLY_BENCHMARK_H_
18 #define FOLLY_BENCHMARK_H_
19
20 #include "folly/Preprocessor.h" // for FB_ANONYMOUS_VARIABLE
21 #include <cassert>
22 #include <ctime>
23 #include <boost/function_types/function_arity.hpp>
24 #include <functional>
25 #include <glog/logging.h>
26 #include <gflags/gflags.h>
27 #include <limits>
28
29 DECLARE_bool(benchmark);
30
13692fc add bm_max/min_iters and bm_regex
Spencer Ahrens authored
31
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
32 namespace folly {
33
34 /**
35 * Runs all benchmarks defined. Usually put in main().
36 */
37 void runBenchmarks();
38
39 /**
40 * Runs all benchmarks defined if and only if the --benchmark flag has
41 * been passed to the program. Usually put in main().
42 */
43 inline bool runBenchmarksOnFlag() {
44 if (FLAGS_benchmark) {
45 runBenchmarks();
46 }
47 return FLAGS_benchmark;
48 }
49
50 namespace detail {
51
52 /**
53 * This is the clock ID used for measuring time. On older kernels, the
54 * resolution of this clock will be very coarse, which will cause the
55 * benchmarks to fail.
56 */
57 enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
58
59 /**
60 * Adds a benchmark wrapped in a std::function. Only used
61 * internally. Pass by value is intentional.
62 */
63 void addBenchmarkImpl(const char* file,
64 const char* name,
65 std::function<uint64_t(unsigned int)>);
66
67 /**
68 * Takes the difference between two timespec values. end is assumed to
69 * occur after start.
70 */
71 inline uint64_t timespecDiff(timespec end, timespec start) {
72 if (end.tv_sec == start.tv_sec) {
73 assert(end.tv_nsec >= start.tv_nsec);
74 return end.tv_nsec - start.tv_nsec;
75 }
76 assert(end.tv_sec > start.tv_sec &&
77 end.tv_sec - start.tv_sec <
78 std::numeric_limits<uint64_t>::max() / 1000000000UL);
79 return (end.tv_sec - start.tv_sec) * 1000000000UL
80 + end.tv_nsec - start.tv_nsec;
81 }
82
83 /**
84 * Takes the difference between two sets of timespec values. The first
85 * two come from a high-resolution clock whereas the other two come
86 * from a low-resolution clock. The crux of the matter is that
87 * high-res values may be bogus as documented in
88 * http://linux.die.net/man/3/clock_gettime. The trouble is when the
89 * running process migrates from one CPU to another, which is more
90 * likely for long-running processes. Therefore we watch for high
91 * differences between the two timings.
92 *
93 * This function is subject to further improvements.
94 */
95 inline uint64_t timespecDiff(timespec end, timespec start,
96 timespec endCoarse, timespec startCoarse) {
97 auto fine = timespecDiff(end, start);
98 auto coarse = timespecDiff(endCoarse, startCoarse);
99 if (coarse - fine >= 1000000) {
100 // The fine time is in all likelihood bogus
101 return coarse;
102 }
103 return fine;
104 }
105
106 } // namespace detail
107
108 /**
109 * Supporting type for BENCHMARK_SUSPEND defined below.
110 */
111 struct BenchmarkSuspender {
112 BenchmarkSuspender() {
113 CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
114 }
115
116 BenchmarkSuspender(const BenchmarkSuspender &) = delete;
117 BenchmarkSuspender(BenchmarkSuspender && rhs) {
118 start = rhs.start;
119 rhs.start.tv_nsec = rhs.start.tv_sec = 0;
120 }
121
122 BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
123 BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
124 if (start.tv_nsec > 0 || start.tv_sec > 0) {
125 tally();
126 }
127 start = rhs.start;
128 rhs.start.tv_nsec = rhs.start.tv_sec = 0;
129 return *this;
130 }
131
132 ~BenchmarkSuspender() {
133 if (start.tv_nsec > 0 || start.tv_sec > 0) {
134 tally();
135 }
136 }
137
138 void dismiss() {
139 assert(start.tv_nsec > 0 || start.tv_sec > 0);
140 tally();
141 start.tv_nsec = start.tv_sec = 0;
142 }
143
144 void rehire() {
145 assert(start.tv_nsec == 0 || start.tv_sec == 0);
146 CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
147 }
148
149 /**
150 * This helps the macro definition. To get around the dangers of
151 * operator bool, returns a pointer to member (which allows no
152 * arithmetic).
153 */
154 operator int BenchmarkSuspender::*() const {
155 return nullptr;
156 }
157
158 /**
159 * Accumulates nanoseconds spent outside benchmark.
160 */
161 typedef uint64_t NanosecondsSpent;
162 static NanosecondsSpent nsSpent;
163
164 private:
165 void tally() {
166 timespec end;
167 CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
168 nsSpent += detail::timespecDiff(end, start);
169 start = end;
170 }
171
172 timespec start;
173 };
174
175 /**
176 * Adds a benchmark. Usually not called directly but instead through
177 * the macro BENCHMARK defined below. The lambda function involved
178 * must take exactly one parameter of type unsigned, and the benchmark
179 * uses it with counter semantics (iteration occurs inside the
180 * function).
181 */
182 template <typename Lambda>
183 typename std::enable_if<
184 boost::function_types::function_arity<decltype(&Lambda::operator())>::value
185 == 2
186 >::type
187 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
188 auto execute = [=](unsigned int times) {
189 BenchmarkSuspender::nsSpent = 0;
190 timespec start, end;
191
192 // CORE MEASUREMENT STARTS
193 CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
194 lambda(times);
195 CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
196 // CORE MEASUREMENT ENDS
197
198 return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent;
199 };
200
201 detail::addBenchmarkImpl(file, name,
202 std::function<uint64_t(unsigned int)>(execute));
203 }
204
205 /**
206 * Adds a benchmark. Usually not called directly but instead through
207 * the macro BENCHMARK defined below. The lambda function involved
208 * must take zero parameters, and the benchmark calls it repeatedly
209 * (iteration occurs outside the function).
210 */
211 template <typename Lambda>
212 typename std::enable_if<
213 boost::function_types::function_arity<decltype(&Lambda::operator())>::value
214 == 1
215 >::type
216 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
217 addBenchmark(file, name, [=](unsigned int times) {
218 while (times-- > 0) {
219 lambda();
220 }
221 });
222 }
223
224 /**
225 * Call doNotOptimizeAway(var) against variables that you use for
226 * benchmarking but otherwise are useless. The compiler tends to do a
227 * good job at eliminating unused variables, and this function fools
228 * it into thinking var is in fact needed.
229 */
230 template <class T>
231 void doNotOptimizeAway(T&& datum) {
232 asm volatile("" : "+r" (datum));
233 }
234
235 } // namespace folly
236
237 /**
238 * Introduces a benchmark function. Used internally, see BENCHMARK and
239 * friends below.
240 */
241 #define BENCHMARK_IMPL(funName, stringName, paramType, paramName) \
242 static void funName(paramType); \
243 static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \
244 ::folly::addBenchmark(__FILE__, stringName, \
245 [](paramType paramName) { funName(paramName); }), \
246 true); \
247 static void funName(paramType paramName)
248
249 /**
250 * Introduces a benchmark function. Use with either one one or two
251 * arguments. The first is the name of the benchmark. Use something
252 * descriptive, such as insertVectorBegin. The second argument may be
253 * missing, or could be a symbolic counter. The counter dictates how
254 * many internal iteration the benchmark does. Example:
255 *
256 * BENCHMARK(vectorPushBack) {
257 * vector<int> v;
258 * v.push_back(42);
259 * }
260 *
261 * BENCHMARK(insertVectorBegin, n) {
262 * vector<int> v;
263 * FOR_EACH_RANGE (i, 0, n) {
264 * v.insert(v.begin(), 42);
265 * }
266 * }
267 */
268 #define BENCHMARK(name, ...) \
269 BENCHMARK_IMPL( \
270 name, \
271 FB_STRINGIZE(name), \
272 FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \
273 __VA_ARGS__)
274
275 /**
276 * Defines a benchmark that passes a parameter to another one. This is
277 * common for benchmarks that need a "problem size" in addition to
278 * "number of iterations". Consider:
279 *
280 * void pushBack(uint n, size_t initialSize) {
281 * vector<int> v;
282 * BENCHMARK_SUSPEND {
283 * v.resize(initialSize);
284 * }
285 * FOR_EACH_RANGE (i, 0, n) {
286 * v.push_back(i);
287 * }
288 * }
289 * BENCHMARK_PARAM(pushBack, 0)
290 * BENCHMARK_PARAM(pushBack, 1000)
291 * BENCHMARK_PARAM(pushBack, 1000000)
292 *
293 * The benchmark above estimates the speed of push_back at different
294 * initial sizes of the vector. The framework will pass 0, 1000, and
295 * 1000000 for initialSize, and the iteration count for n.
296 */
297 #define BENCHMARK_PARAM(name, param) \
4a7f5a2 Adam Simpkins Add BucketedTimeSeries to folly
simpkins authored
298 BENCHMARK_NAMED_PARAM(name, param, param)
299
300 /*
301 * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
302 * parameter, rather than using the parameter value.
303 *
304 * Useful when the parameter value is not a valid token for string pasting,
305 * of when you want to specify multiple parameter arguments.
306 *
307 * For example:
308 *
309 * void addValue(uint n, int64_t bucketSize, int64_t min, int64_t max) {
310 * Histogram<int64_t> hist(bucketSize, min, max);
311 * int64_t num = min;
312 * FOR_EACH_RANGE (i, 0, n) {
313 * hist.addValue(num);
314 * ++num;
315 * if (num > max) { num = min; }
316 * }
317 * }
318 *
319 * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100)
320 * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000)
321 * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000)
322 */
323 #define BENCHMARK_NAMED_PARAM(name, param_name, ...) \
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
324 BENCHMARK_IMPL( \
4a7f5a2 Adam Simpkins Add BucketedTimeSeries to folly
simpkins authored
325 FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
326 FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
327 unsigned, \
328 iters) { \
4a7f5a2 Adam Simpkins Add BucketedTimeSeries to folly
simpkins authored
329 name(iters, ## __VA_ARGS__); \
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
330 }
331
332 /**
333 * Just like BENCHMARK, but prints the time relative to a
334 * baseline. The baseline is the most recent BENCHMARK() seen in
335 * lexical order. Example:
336 *
337 * // This is the baseline
338 * BENCHMARK(insertVectorBegin, n) {
339 * vector<int> v;
340 * FOR_EACH_RANGE (i, 0, n) {
341 * v.insert(v.begin(), 42);
342 * }
343 * }
344 *
345 * BENCHMARK_RELATIVE(insertListBegin, n) {
346 * list<int> s;
347 * FOR_EACH_RANGE (i, 0, n) {
348 * s.insert(s.begin(), 42);
349 * }
350 * }
351 *
352 * Any number of relative benchmark can be associated with a
353 * baseline. Another BENCHMARK() occurrence effectively establishes a
354 * new baseline.
355 */
356 #define BENCHMARK_RELATIVE(name, ...) \
357 BENCHMARK_IMPL( \
358 name, \
359 "%" FB_STRINGIZE(name), \
360 FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \
361 __VA_ARGS__)
362
363 /**
364 * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
365 */
366 #define BENCHMARK_RELATIVE_PARAM(name, param) \
4a7f5a2 Adam Simpkins Add BucketedTimeSeries to folly
simpkins authored
367 BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
368
369 /**
370 * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
371 */
372 #define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...) \
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
373 BENCHMARK_IMPL( \
4a7f5a2 Adam Simpkins Add BucketedTimeSeries to folly
simpkins authored
374 FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
375 "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
376 unsigned, \
377 iters) { \
4a7f5a2 Adam Simpkins Add BucketedTimeSeries to folly
simpkins authored
378 name(iters, ## __VA_ARGS__); \
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
379 }
380
381 /**
382 * Draws a line of dashes.
383 */
384 #define BENCHMARK_DRAW_LINE() \
385 static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \
386 ::folly::addBenchmark(__FILE__, "-", []() { }), \
387 true);
388
389 /**
390 * Allows execution of code that doesn't count torward the benchmark's
391 * time budget. Example:
392 *
393 * BENCHMARK_START_GROUP(insertVectorBegin, n) {
394 * vector<int> v;
8108795 Fix a typo in benchmark comments
Sherman Ye authored
395 * BENCHMARK_SUSPEND {
27494a2 jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored
396 * v.reserve(n);
397 * }
398 * FOR_EACH_RANGE (i, 0, n) {
399 * v.insert(v.begin(), 42);
400 * }
401 * }
402 */
403 #define BENCHMARK_SUSPEND \
404 if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \
405 ::folly::BenchmarkSuspender()) {} \
406 else
407
408 #endif // FOLLY_BENCHMARK_H_
Something went wrong with that request. Please try again.