Skip to content

Commit

Permalink
os/bluestore: fix split vs finish_write race
Browse files Browse the repository at this point in the history
In _tcx_finish(), we were looking at the right Cache for the collection,
and then calling finish_write with that Cache and taking the lock.  This
could race with a split_cache() such that after we got the lock the
collection was not on a different cache.  This would in turn lead to a
failed assertion later on in _rm_buffer when the sharedblob was trimmed.

Fixes: http://tracker.ceph.com/issues/24439
Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit f21f1f1)
  • Loading branch information
liewegas authored and Prashant D committed Aug 27, 2018
1 parent f2152d7 commit e41f2d2
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
23 changes: 19 additions & 4 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -1422,10 +1422,8 @@ void BlueStore::BufferSpace::read(
cache->logger->inc(l_bluestore_buffer_miss_bytes, miss_bytes);
}

void BlueStore::BufferSpace::finish_write(Cache* cache, uint64_t seq)
void BlueStore::BufferSpace::_finish_write(Cache* cache, uint64_t seq)
{
std::lock_guard<std::recursive_mutex> l(cache->lock);

auto i = writing.begin();
while (i != writing.end()) {
if (i->seq > seq) {
Expand Down Expand Up @@ -1696,6 +1694,23 @@ void BlueStore::SharedBlob::put_ref(uint64_t offset, uint32_t length,
unshare && !*unshare ? unshare : nullptr);
}

void BlueStore::SharedBlob::finish_write(uint64_t seq)
{
while (true) {
Cache *cache = coll->cache;
std::lock_guard<std::recursive_mutex> l(cache->lock);
if (coll->cache != cache) {
ldout(coll->store->cct, 20) << __func__
<< " raced with sb cache update, was " << cache
<< ", now " << coll->cache << ", retrying"
<< dendl;
continue;
}
bc._finish_write(cache, seq);
break;
}
}

// SharedBlobSet

#undef dout_prefix
Expand Down Expand Up @@ -8672,7 +8687,7 @@ void BlueStore::_txc_finish(TransContext *txc)
assert(txc->state == TransContext::STATE_FINISHING);

for (auto& sb : txc->shared_blobs_written) {
sb->bc.finish_write(sb->get_cache(), txc->seq);
sb->finish_write(txc->seq);
}
txc->shared_blobs_written.clear();

Expand Down
4 changes: 3 additions & 1 deletion src/os/bluestore/BlueStore.h
Expand Up @@ -334,7 +334,7 @@ class BlueStore : public ObjectStore,
b->cache_private = _discard(cache, offset, bl.length());
_add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr);
}
void finish_write(Cache* cache, uint64_t seq);
void _finish_write(Cache* cache, uint64_t seq);
void did_read(Cache* cache, uint32_t offset, bufferlist& bl) {
std::lock_guard<std::recursive_mutex> l(cache->lock);
Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl);
Expand Down Expand Up @@ -410,6 +410,8 @@ class BlueStore : public ObjectStore,
void put_ref(uint64_t offset, uint32_t length,
PExtentVector *r, bool *unshare);

void finish_write(uint64_t seq);

friend bool operator==(const SharedBlob &l, const SharedBlob &r) {
return l.get_sbid() == r.get_sbid();
}
Expand Down

0 comments on commit e41f2d2

Please sign in to comment.