From c571293c4389e801834dd8686f7d3f5f17a1d159 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Thu, 9 Mar 2023 16:14:16 -0800 Subject: [PATCH] Fix race in acquire The assumption for moving items was that once item is unmarked no one can add new waiters for that item. However, since incrementing item ref count was not done under the MoveMap lock, there was a race: item could have been unmarked right after incRef returned incFailedMoving. --- cachelib/allocator/CacheAllocator-inl.h | 36 ++++++++++++++++--------- cachelib/allocator/CacheAllocator.h | 2 +- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 01a5f5e49e..cee53dd156 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -1024,16 +1024,21 @@ CacheAllocator::acquire(Item* it) { SCOPE_FAIL { stats_.numRefcountOverflow.inc(); }; // TODO: do not block incRef for child items to avoid deadlock - auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem(); - auto incRes = incRef(*it, failIfMoving); - if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) { - return WriteHandle{it, *this}; - } else if (incRes == RefcountWithFlags::incResult::incFailedEviction){ - // item is being evicted - return WriteHandle{}; - } else { - // item is being moved - wait for completion - return handleWithWaitContextForMovingItem(*it); + const auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem(); + + while (true) { + auto incRes = incRef(*it, failIfMoving); + if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) { + return WriteHandle{it, *this}; + } else if (incRes == RefcountWithFlags::incResult::incFailedEviction){ + // item is being evicted + return WriteHandle{}; + } else { + // item is being moved - wait for completion + WriteHandle handle; + if (tryGetHandleWithWaitContextForMovingItem(*it, handle)) + return handle; + } } } @@ -1255,20 +1260,25 @@ CacheAllocator::insertOrReplace(const WriteHandle& handle) { * 3. Wait until the moving thread will complete its job. */ template -typename CacheAllocator::WriteHandle -CacheAllocator::handleWithWaitContextForMovingItem(Item& item) { +bool +CacheAllocator::tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle) { auto shard = getShardForKey(item.getKey()); auto& movesMap = getMoveMapForShard(shard); { auto lock = getMoveLockForShard(shard); + // item might have been evicted or moved before the lock was acquired + if (!item.isMoving()) + return false; + WriteHandle hdl{*this}; auto waitContext = hdl.getItemWaitContext(); auto ret = movesMap.try_emplace(item.getKey(), std::make_unique()); ret.first->second->addWaiter(std::move(waitContext)); - return hdl; + handle = std::move(hdl); + return true; } } diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 39a1c4881b..876931fd8f 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -2250,7 +2250,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid); size_t memoryTierSize(TierId tid) const; - WriteHandle handleWithWaitContextForMovingItem(Item& item); + bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle); size_t wakeUpWaitersLocked(folly::StringPiece key, WriteHandle&& handle);