Skip to content

Commit 717349b

Browse files
toddlipconwdberkeley
authored andcommitted
KUDU-2711 (part 2): use a RWMutex for TSDescriptor
One reason that GetTableLocations can end up slow is contention on the TSDescriptor object. This patch changes the spinlock to an rw_spinlock. The benchmark improves throughput by about 77%: table_locations-itest --gtest_filter=\*Bench\* \ --benchmark_num_tablets 300 --benchmark_num_threads 30 \ --benchmark_runtime_secs=10 before fix: Count: 10555 Mean: 8567.29 Percentiles: 0% (min) = 68 25% = 7712 50% (med) = 8320 75% = 9152 95% = 10880 99% = 12224 99.9% = 17024 99.99% = 27776 100% (max) = 30384 with rwlock: Count: 18758 Mean: 4024.09 Percentiles: 0% (min) = 136 25% = 3472 50% (med) = 4032 75% = 4512 95% = 5248 99% = 6048 99.9% = 7712 99.99% = 13440 100% (max) = 13976 Change-Id: Id48635e49f11d20fa1802b17a3ff5771632dfd01 Reviewed-on: http://gerrit.cloudera.org:8080/12614 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo <adar@cloudera.com> Reviewed-by: Todd Lipcon <todd@apache.org> Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
1 parent 7645f5b commit 717349b

File tree

2 files changed

+20
-19
lines changed

2 files changed

+20
-19
lines changed

src/kudu/master/ts_descriptor.cc

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ Status TSDescriptor::Register(const NodeInstancePB& instance,
160160
LocationCache* location_cache) {
161161
// Do basic registration work under the lock.
162162
{
163-
std::lock_guard<simple_spinlock> l(lock_);
163+
std::lock_guard<rw_spinlock> l(lock_);
164164
RETURN_NOT_OK(RegisterUnlocked(instance, registration));
165165
}
166166

@@ -181,7 +181,7 @@ Status TSDescriptor::Register(const NodeInstancePB& instance,
181181
// Assign the location under the lock if location resolution succeeds. If
182182
// it fails, log the error.
183183
if (s.ok()) {
184-
std::lock_guard<simple_spinlock> l(lock_);
184+
std::lock_guard<rw_spinlock> l(lock_);
185185
location_.emplace(std::move(location));
186186
} else {
187187
KLOG_EVERY_N_SECS(ERROR, 60) << Substitute(
@@ -195,13 +195,13 @@ Status TSDescriptor::Register(const NodeInstancePB& instance,
195195
}
196196

197197
void TSDescriptor::UpdateHeartbeatTime() {
198-
std::lock_guard<simple_spinlock> l(lock_);
198+
std::lock_guard<rw_spinlock> l(lock_);
199199
last_heartbeat_ = MonoTime::Now();
200200
}
201201

202202
MonoDelta TSDescriptor::TimeSinceHeartbeat() const {
203203
MonoTime now(MonoTime::Now());
204-
std::lock_guard<simple_spinlock> l(lock_);
204+
shared_lock<rw_spinlock> l(lock_);
205205
return now - last_heartbeat_;
206206
}
207207

@@ -210,7 +210,7 @@ bool TSDescriptor::PresumedDead() const {
210210
}
211211

212212
int64_t TSDescriptor::latest_seqno() const {
213-
std::lock_guard<simple_spinlock> l(lock_);
213+
shared_lock<rw_spinlock> l(lock_);
214214
return latest_seqno_;
215215
}
216216

@@ -232,33 +232,34 @@ void TSDescriptor::DecayRecentReplicaCreationsUnlocked() {
232232
}
233233

234234
void TSDescriptor::IncrementRecentReplicaCreations() {
235-
std::lock_guard<simple_spinlock> l(lock_);
235+
std::lock_guard<rw_spinlock> l(lock_);
236236
DecayRecentReplicaCreationsUnlocked();
237237
recent_replica_creations_ += 1;
238238
}
239239

240240
double TSDescriptor::RecentReplicaCreations() {
241-
std::lock_guard<simple_spinlock> l(lock_);
241+
// NOTE: not a shared lock because of the "Decay" side effect.
242+
std::lock_guard<rw_spinlock> l(lock_);
242243
DecayRecentReplicaCreationsUnlocked();
243244
return recent_replica_creations_;
244245
}
245246

246247
void TSDescriptor::GetRegistration(ServerRegistrationPB* reg) const {
247-
std::lock_guard<simple_spinlock> l(lock_);
248+
shared_lock<rw_spinlock> l(lock_);
248249
CHECK(registration_) << "No registration";
249250
CHECK_NOTNULL(reg)->CopyFrom(*registration_);
250251
}
251252

252253
void TSDescriptor::GetNodeInstancePB(NodeInstancePB* instance_pb) const {
253-
std::lock_guard<simple_spinlock> l(lock_);
254+
shared_lock<rw_spinlock> l(lock_);
254255
instance_pb->set_permanent_uuid(permanent_uuid_);
255256
instance_pb->set_instance_seqno(latest_seqno_);
256257
}
257258

258259
Status TSDescriptor::ResolveSockaddr(Sockaddr* addr, string* host) const {
259260
vector<HostPort> hostports;
260261
{
261-
std::lock_guard<simple_spinlock> l(lock_);
262+
shared_lock<rw_spinlock> l(lock_);
262263
for (const HostPortPB& addr : registration_->rpc_addresses()) {
263264
hostports.emplace_back(addr.host(), addr.port());
264265
}
@@ -293,7 +294,7 @@ Status TSDescriptor::ResolveSockaddr(Sockaddr* addr, string* host) const {
293294
Status TSDescriptor::GetTSAdminProxy(const shared_ptr<rpc::Messenger>& messenger,
294295
shared_ptr<tserver::TabletServerAdminServiceProxy>* proxy) {
295296
{
296-
std::lock_guard<simple_spinlock> l(lock_);
297+
shared_lock<rw_spinlock> l(lock_);
297298
if (ts_admin_proxy_) {
298299
*proxy = ts_admin_proxy_;
299300
return Status::OK();
@@ -304,7 +305,7 @@ Status TSDescriptor::GetTSAdminProxy(const shared_ptr<rpc::Messenger>& messenger
304305
string host;
305306
RETURN_NOT_OK(ResolveSockaddr(&addr, &host));
306307

307-
std::lock_guard<simple_spinlock> l(lock_);
308+
std::lock_guard<rw_spinlock> l(lock_);
308309
if (!ts_admin_proxy_) {
309310
ts_admin_proxy_.reset(new tserver::TabletServerAdminServiceProxy(
310311
messenger, addr, std::move(host)));
@@ -316,7 +317,7 @@ Status TSDescriptor::GetTSAdminProxy(const shared_ptr<rpc::Messenger>& messenger
316317
Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messenger,
317318
shared_ptr<consensus::ConsensusServiceProxy>* proxy) {
318319
{
319-
std::lock_guard<simple_spinlock> l(lock_);
320+
shared_lock<rw_spinlock> l(lock_);
320321
if (consensus_proxy_) {
321322
*proxy = consensus_proxy_;
322323
return Status::OK();
@@ -327,7 +328,7 @@ Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messeng
327328
string host;
328329
RETURN_NOT_OK(ResolveSockaddr(&addr, &host));
329330

330-
std::lock_guard<simple_spinlock> l(lock_);
331+
std::lock_guard<rw_spinlock> l(lock_);
331332
if (!consensus_proxy_) {
332333
consensus_proxy_.reset(new consensus::ConsensusServiceProxy(
333334
messenger, addr, std::move(host)));
@@ -337,7 +338,7 @@ Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messeng
337338
}
338339

339340
string TSDescriptor::ToString() const {
340-
std::lock_guard<simple_spinlock> l(lock_);
341+
shared_lock<rw_spinlock> l(lock_);
341342
const auto& addr = registration_->rpc_addresses(0);
342343
return Substitute("$0 ($1:$2)", permanent_uuid_, addr.host(), addr.port());
343344
}

src/kudu/master/ts_descriptor.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,21 +114,21 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
114114
// Set the number of live replicas (i.e. running or bootstrapping).
115115
void set_num_live_replicas(int n) {
116116
DCHECK_GE(n, 0);
117-
std::lock_guard<simple_spinlock> l(lock_);
117+
std::lock_guard<rw_spinlock> l(lock_);
118118
num_live_replicas_ = n;
119119
}
120120

121121
// Return the number of live replicas (i.e running or bootstrapping).
122122
int num_live_replicas() const {
123-
std::lock_guard<simple_spinlock> l(lock_);
123+
shared_lock<rw_spinlock> l(lock_);
124124
return num_live_replicas_;
125125
}
126126

127127
// Return the location of the tablet server. This returns a safe copy
128128
// since the location could change at any time if the tablet server
129129
// re-registers.
130130
boost::optional<std::string> location() const {
131-
std::lock_guard<simple_spinlock> l(lock_);
131+
shared_lock<rw_spinlock> l(lock_);
132132
return location_;
133133
}
134134

@@ -153,7 +153,7 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
153153

154154
void DecayRecentReplicaCreationsUnlocked();
155155

156-
mutable simple_spinlock lock_;
156+
mutable rw_spinlock lock_;
157157

158158
const std::string permanent_uuid_;
159159
int64_t latest_seqno_;

0 commit comments

Comments
 (0)