Skip to content

Commit af23f4d

Browse files
felixhandtefacebook-github-bot
authored andcommitted
Add a Benchmark to Measure Context Pool Performance
Summary: Per Orvid's comment on D64837870, it would be interesting to move to dynamically sizing the core local context pool cache. I am interested in exploring what that sizing policy should be. In order to do so, I want to be able to measure the performance of the pools under different scenarios. In the past I've done so by measuring overall compression performance of small inputs but that's a pretty crude method. This is more direct and more sensitive. Although of course it is also more artificial. Reviewed By: yfeldblum Differential Revision: D64973230 fbshipit-source-id: b962eb9ce65dd3eb4e77b19b69d003bf31fca1d1
1 parent 3257019 commit af23f4d

File tree

2 files changed

+309
-0
lines changed

2 files changed

+309
-0
lines changed

folly/compression/test/BUCK

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,22 @@ fbcode_target(
5252
],
5353
)
5454

55+
fbcode_target(
56+
_kind = cpp_binary,
57+
name = "compression_context_pool_benchmark",
58+
srcs = ["CompressionContextPoolBenchmark.cpp"],
59+
headers = [],
60+
deps = [
61+
"//folly:benchmark",
62+
"//folly/compression:compression_context_pool",
63+
"//folly/compression:compression_context_pool_singletons",
64+
"//folly/lang:keep",
65+
],
66+
external_deps = [
67+
"glog",
68+
],
69+
)
70+
5571
fbcode_target(
5672
_kind = cpp_binary,
5773
name = "quotient_multiset_benchmark",
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <folly/compression/CompressionContextPool.h>
18+
#include <folly/compression/CompressionContextPoolSingletons.h>
19+
#include <folly/compression/CompressionCoreLocalContextPool.h>
20+
21+
#include <memory>
22+
#include <thread>
23+
24+
#include <glog/logging.h>
25+
26+
#include <folly/Benchmark.h>
27+
28+
namespace {
29+
30+
class alignas(folly::hardware_destructive_interference_size) Foo {
31+
public:
32+
void use() { used_ = true; }
33+
34+
void reset() { used_ = false; }
35+
36+
private:
37+
bool used_{false};
38+
};
39+
40+
struct FooCreator {
41+
Foo* operator()() { return new Foo(); }
42+
};
43+
44+
struct FooDeleter {
45+
void operator()(Foo* f) { delete f; }
46+
};
47+
48+
struct FooResetter {
49+
void operator()(Foo* f) { f->reset(); }
50+
};
51+
52+
using FooStackPool = folly::compression::
53+
CompressionContextPool<Foo, FooCreator, FooDeleter, FooResetter>;
54+
55+
template <int NumStripes>
56+
using FooCoreLocalPool = folly::compression::CompressionCoreLocalContextPool<
57+
Foo,
58+
FooCreator,
59+
FooDeleter,
60+
FooResetter,
61+
NumStripes>;
62+
63+
template <typename Pool>
64+
size_t multithreadedBench(size_t iters, size_t numThreads) {
65+
folly::BenchmarkSuspender startup_suspender;
66+
67+
iters *= 1024;
68+
const size_t iters_per_thread = iters;
69+
70+
Pool pool;
71+
72+
std::atomic<size_t> ready{0};
73+
std::atomic<size_t> finished{0};
74+
std::atomic<bool> go{false};
75+
std::atomic<bool> exit{false};
76+
std::atomic<size_t> total{0};
77+
78+
{
79+
// Pre-init enough objects.
80+
std::vector<typename Pool::Ref> refs;
81+
while (refs.size() < numThreads) {
82+
refs.push_back(pool.get());
83+
}
84+
}
85+
pool.flush_shallow();
86+
87+
// if we happen to be using the tlsRoundRobin, then sequentially
88+
// assigning the thread identifiers is the unlikely best-case scenario.
89+
// We don't want to unfairly benefit or penalize. Computing the exact
90+
// maximum likelihood of the probability distributions is annoying, so
91+
// I approximate as 2/5 of the ids that have no threads, 2/5 that have
92+
// 1, 2/15 that have 2, and 1/15 that have 3. We accomplish this by
93+
// wrapping back to slot 0 when we hit 1/15 and 1/5.
94+
95+
std::vector<std::thread> threads;
96+
while (threads.size() < numThreads) {
97+
threads.push_back(std::thread([&, iters_per_thread]() {
98+
size_t local_count = 0;
99+
100+
auto finish_loop_iters = iters_per_thread / 10;
101+
if (finish_loop_iters < 10) {
102+
finish_loop_iters = 10;
103+
}
104+
105+
ready++;
106+
107+
while (!go.load()) {
108+
std::this_thread::yield();
109+
}
110+
111+
for (size_t i = iters_per_thread; i > 0; --i) {
112+
auto ref = pool.get();
113+
ref->use();
114+
folly::doNotOptimizeAway(ref);
115+
}
116+
117+
local_count += iters_per_thread;
118+
119+
finished++;
120+
121+
while (!exit.load()) {
122+
// Keep accumulating iterations until all threads are finished.
123+
for (size_t i = finish_loop_iters; i > 0; --i) {
124+
auto ref = pool.get();
125+
ref->use();
126+
folly::doNotOptimizeAway(ref);
127+
}
128+
local_count += finish_loop_iters;
129+
}
130+
131+
total += local_count;
132+
133+
// LOG(INFO) << local_count;
134+
}));
135+
136+
if (threads.size() == numThreads / 15 || threads.size() == numThreads / 5) {
137+
// create a few dummy threads to wrap back around to 0 mod numCpus
138+
for (size_t i = threads.size(); i != numThreads; ++i) {
139+
std::thread t([&]() {});
140+
t.join();
141+
}
142+
}
143+
}
144+
145+
while (ready < numThreads) {
146+
std::this_thread::yield();
147+
}
148+
startup_suspender.dismiss();
149+
go = true;
150+
151+
while (finished < numThreads) {
152+
std::this_thread::yield();
153+
}
154+
155+
exit = true;
156+
157+
folly::BenchmarkSuspender end_suspender;
158+
159+
for (auto& thr : threads) {
160+
thr.join();
161+
}
162+
163+
// LOG(INFO) << total.load();
164+
165+
return total.load() / numThreads;
166+
}
167+
168+
#define POOL_BENCHMARK(Name, Pool, NumThreads) \
169+
BENCHMARK_MULTI(Name, n) { \
170+
return multithreadedBench<Pool>(n, NumThreads); \
171+
}
172+
173+
#define REGULAR_POOL_BENCHMARK(NumThreads) \
174+
POOL_BENCHMARK(StackPool_##NumThreads##_threads, FooStackPool, NumThreads)
175+
176+
#define CORE_LOCAL_POOL_BENCHMARK(NumSlots, NumThreads) \
177+
POOL_BENCHMARK( \
178+
CLPool_##NumSlots##_slots_##NumThreads##_threads, \
179+
FooCoreLocalPool<NumSlots>, \
180+
NumThreads)
181+
182+
REGULAR_POOL_BENCHMARK(1)
183+
REGULAR_POOL_BENCHMARK(2)
184+
REGULAR_POOL_BENCHMARK(4)
185+
REGULAR_POOL_BENCHMARK(8)
186+
REGULAR_POOL_BENCHMARK(16)
187+
REGULAR_POOL_BENCHMARK(32)
188+
REGULAR_POOL_BENCHMARK(64)
189+
REGULAR_POOL_BENCHMARK(128)
190+
191+
BENCHMARK_DRAW_LINE();
192+
193+
CORE_LOCAL_POOL_BENCHMARK(1, 1)
194+
CORE_LOCAL_POOL_BENCHMARK(2, 1)
195+
CORE_LOCAL_POOL_BENCHMARK(4, 1)
196+
CORE_LOCAL_POOL_BENCHMARK(8, 1)
197+
CORE_LOCAL_POOL_BENCHMARK(16, 1)
198+
CORE_LOCAL_POOL_BENCHMARK(32, 1)
199+
CORE_LOCAL_POOL_BENCHMARK(64, 1)
200+
CORE_LOCAL_POOL_BENCHMARK(128, 1)
201+
CORE_LOCAL_POOL_BENCHMARK(256, 1)
202+
203+
BENCHMARK_DRAW_LINE();
204+
205+
CORE_LOCAL_POOL_BENCHMARK(1, 2)
206+
CORE_LOCAL_POOL_BENCHMARK(2, 2)
207+
CORE_LOCAL_POOL_BENCHMARK(4, 2)
208+
CORE_LOCAL_POOL_BENCHMARK(8, 2)
209+
CORE_LOCAL_POOL_BENCHMARK(16, 2)
210+
CORE_LOCAL_POOL_BENCHMARK(32, 2)
211+
CORE_LOCAL_POOL_BENCHMARK(64, 2)
212+
CORE_LOCAL_POOL_BENCHMARK(128, 2)
213+
CORE_LOCAL_POOL_BENCHMARK(256, 2)
214+
215+
BENCHMARK_DRAW_LINE();
216+
217+
CORE_LOCAL_POOL_BENCHMARK(1, 4)
218+
CORE_LOCAL_POOL_BENCHMARK(2, 4)
219+
CORE_LOCAL_POOL_BENCHMARK(4, 4)
220+
CORE_LOCAL_POOL_BENCHMARK(8, 4)
221+
CORE_LOCAL_POOL_BENCHMARK(16, 4)
222+
CORE_LOCAL_POOL_BENCHMARK(32, 4)
223+
CORE_LOCAL_POOL_BENCHMARK(64, 4)
224+
CORE_LOCAL_POOL_BENCHMARK(128, 4)
225+
CORE_LOCAL_POOL_BENCHMARK(256, 4)
226+
227+
BENCHMARK_DRAW_LINE();
228+
229+
CORE_LOCAL_POOL_BENCHMARK(1, 8)
230+
CORE_LOCAL_POOL_BENCHMARK(2, 8)
231+
CORE_LOCAL_POOL_BENCHMARK(4, 8)
232+
CORE_LOCAL_POOL_BENCHMARK(8, 8)
233+
CORE_LOCAL_POOL_BENCHMARK(16, 8)
234+
CORE_LOCAL_POOL_BENCHMARK(32, 8)
235+
CORE_LOCAL_POOL_BENCHMARK(64, 8)
236+
CORE_LOCAL_POOL_BENCHMARK(128, 8)
237+
CORE_LOCAL_POOL_BENCHMARK(256, 8)
238+
239+
BENCHMARK_DRAW_LINE();
240+
241+
CORE_LOCAL_POOL_BENCHMARK(1, 16)
242+
CORE_LOCAL_POOL_BENCHMARK(2, 16)
243+
CORE_LOCAL_POOL_BENCHMARK(4, 16)
244+
CORE_LOCAL_POOL_BENCHMARK(8, 16)
245+
CORE_LOCAL_POOL_BENCHMARK(16, 16)
246+
CORE_LOCAL_POOL_BENCHMARK(32, 16)
247+
CORE_LOCAL_POOL_BENCHMARK(64, 16)
248+
CORE_LOCAL_POOL_BENCHMARK(128, 16)
249+
CORE_LOCAL_POOL_BENCHMARK(256, 16)
250+
251+
BENCHMARK_DRAW_LINE();
252+
253+
CORE_LOCAL_POOL_BENCHMARK(1, 32)
254+
CORE_LOCAL_POOL_BENCHMARK(2, 32)
255+
CORE_LOCAL_POOL_BENCHMARK(4, 32)
256+
CORE_LOCAL_POOL_BENCHMARK(8, 32)
257+
CORE_LOCAL_POOL_BENCHMARK(16, 32)
258+
CORE_LOCAL_POOL_BENCHMARK(32, 32)
259+
CORE_LOCAL_POOL_BENCHMARK(64, 32)
260+
CORE_LOCAL_POOL_BENCHMARK(128, 32)
261+
CORE_LOCAL_POOL_BENCHMARK(256, 32)
262+
263+
BENCHMARK_DRAW_LINE();
264+
265+
CORE_LOCAL_POOL_BENCHMARK(1, 64)
266+
CORE_LOCAL_POOL_BENCHMARK(2, 64)
267+
CORE_LOCAL_POOL_BENCHMARK(4, 64)
268+
CORE_LOCAL_POOL_BENCHMARK(8, 64)
269+
CORE_LOCAL_POOL_BENCHMARK(16, 64)
270+
CORE_LOCAL_POOL_BENCHMARK(32, 64)
271+
CORE_LOCAL_POOL_BENCHMARK(64, 64)
272+
CORE_LOCAL_POOL_BENCHMARK(128, 64)
273+
CORE_LOCAL_POOL_BENCHMARK(256, 64)
274+
275+
BENCHMARK_DRAW_LINE();
276+
277+
CORE_LOCAL_POOL_BENCHMARK(1, 128)
278+
CORE_LOCAL_POOL_BENCHMARK(2, 128)
279+
CORE_LOCAL_POOL_BENCHMARK(4, 128)
280+
CORE_LOCAL_POOL_BENCHMARK(8, 128)
281+
CORE_LOCAL_POOL_BENCHMARK(16, 128)
282+
CORE_LOCAL_POOL_BENCHMARK(32, 128)
283+
CORE_LOCAL_POOL_BENCHMARK(64, 128)
284+
CORE_LOCAL_POOL_BENCHMARK(128, 128)
285+
CORE_LOCAL_POOL_BENCHMARK(256, 128)
286+
287+
} // anonymous namespace
288+
289+
int main(int argc, char** argv) {
290+
gflags::ParseCommandLineFlags(&argc, &argv, true);
291+
folly::runBenchmarks();
292+
return 0;
293+
}

0 commit comments

Comments
 (0)