diff --git a/.github/workflows/build-cachelib-centos-8-1.yml b/.github/workflows/build-cachelib-centos-8-1.yml
index 5eb1090b0a..3983e0c78b 100644
--- a/.github/workflows/build-cachelib-centos-8-1.yml
+++ b/.github/workflows/build-cachelib-centos-8-1.yml
@@ -14,6 +14,7 @@
 name: build-cachelib-centos-8-1
 on:
 #  push:
+  pull_request:
   schedule:
      - cron:  '0 11 * * 1,3,5'
 jobs:
diff --git a/.github/workflows/build-cachelib-centos-8-5.yml b/.github/workflows/build-cachelib-centos-8-5.yml
index 3ffee37765..4e6c2d12e1 100644
--- a/.github/workflows/build-cachelib-centos-8-5.yml
+++ b/.github/workflows/build-cachelib-centos-8-5.yml
@@ -14,6 +14,7 @@
 name: build-cachelib-centos-8.5
 on:
 #   push:
+  pull_request:
   schedule:
      - cron:  '0 9 * * *'
 jobs:
diff --git a/.github/workflows/build-cachelib-debian-10.yml b/.github/workflows/build-cachelib-debian-10.yml
index c7c67e0724..7f0ab29a6c 100644
--- a/.github/workflows/build-cachelib-debian-10.yml
+++ b/.github/workflows/build-cachelib-debian-10.yml
@@ -14,6 +14,7 @@
 name: build-cachelib-debian-10
 on:
 #  push:
+  pull_request:
   schedule:
      - cron:  '0 13 * * *'
 jobs:
@@ -51,6 +52,9 @@ jobs:
           g++ - || true
       - name: "checkout sources"
         uses: actions/checkout@v2
+      - name: "Add Git safe directory"
+        # Workaround for Docker image bug (GitHub issue #199).
+        run: git config --system --add safe.directory $GITHUB_WORKSPACE
       - name: "Install Prerequisites"
         run: ./contrib/build.sh -S -B
       - name: "Test: update-submodules"
diff --git a/.github/workflows/build-cachelib-fedora-36.yml b/.github/workflows/build-cachelib-fedora-36.yml
index 216dbf5841..f8c0424400 100644
--- a/.github/workflows/build-cachelib-fedora-36.yml
+++ b/.github/workflows/build-cachelib-fedora-36.yml
@@ -14,6 +14,7 @@
 name: build-cachelib-fedora-36
 on:
 #  push:
+  pull_request:
   schedule:
      - cron:  '0 19 * * *'
 jobs:
diff --git a/.github/workflows/build-cachelib-rockylinux-8.yml b/.github/workflows/build-cachelib-rockylinux-8.yml
index 879dc27566..c8af12327d 100644
--- a/.github/workflows/build-cachelib-rockylinux-8.yml
+++ b/.github/workflows/build-cachelib-rockylinux-8.yml
@@ -14,6 +14,7 @@
 name: build-cachelib-rockylinux-8.6
 on:
 #   push:
+  pull_request:
   schedule:
      - cron:  '0 15 * * 2,4,6'
 jobs:
diff --git a/.github/workflows/build-cachelib-rockylinux-9.yml b/.github/workflows/build-cachelib-rockylinux-9.yml
index f6a86d75a0..e26eac6ff1 100644
--- a/.github/workflows/build-cachelib-rockylinux-9.yml
+++ b/.github/workflows/build-cachelib-rockylinux-9.yml
@@ -14,6 +14,7 @@
 name: build-cachelib-rockylinux-9.0
 on:
 #   push:
+  pull_request:
   schedule:
      - cron:  '0 17 * * *'
 jobs:
diff --git a/.github/workflows/build-cachelib-ubuntu-18.yml b/.github/workflows/build-cachelib-ubuntu-18.yml
index fad34c0897..ad068278a4 100644
--- a/.github/workflows/build-cachelib-ubuntu-18.yml
+++ b/.github/workflows/build-cachelib-ubuntu-18.yml
@@ -19,6 +19,7 @@
 name: build-cachelib-ubuntu-18
 on:
 #  push:
+  pull_request:
   schedule:
     - cron:  '0 5 * * 2,4,6'
 jobs:
diff --git a/.github/workflows/build-cachelib-ubuntu-20.yml b/.github/workflows/build-cachelib-ubuntu-20.yml
index 35a3f507e2..a8380fdb96 100644
--- a/.github/workflows/build-cachelib-ubuntu-20.yml
+++ b/.github/workflows/build-cachelib-ubuntu-20.yml
@@ -15,6 +15,7 @@
 name: build-cachelib-ubuntu-20
 on:
 #  push:
+  pull_request:
   schedule:
     - cron:  '0 5 * * 1,3,5'
 jobs:
diff --git a/.github/workflows/build-cachelib-ubuntu-22.yml b/.github/workflows/build-cachelib-ubuntu-22.yml
index b4374a5b96..4db194431d 100644
--- a/.github/workflows/build-cachelib-ubuntu-22.yml
+++ b/.github/workflows/build-cachelib-ubuntu-22.yml
@@ -15,6 +15,7 @@
 name: build-cachelib-ubuntu-22
 on:
 #  push:
+  pull_request:
   schedule:
     - cron:  '0 7 * * *'
 jobs:
diff --git a/.packit.yaml b/.packit.yaml
deleted file mode 100644
index bea307d9d0..0000000000
--- a/.packit.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-# See the documentation for more information:
-# https://packit.dev/docs/configuration
-
-specfile_path: cachelib.spec
-
-upstream_package_name: CacheLib
-downstream_package_name: cachelib
-
-actions:
-  fix-spec-file:
-  - bash -c "sed -i cachelib.spec -e \"s/%global commit.*/%global commit $(git rev-parse HEAD)/\""
-  - bash -c "sed -i cachelib.spec -e \"s/%global date.*/%global date $(git show -s --date=format:'%Y%m%d' --format=%cd)/\""
-  create-archive:
-  - bash -c "COMMIT=$(git rev-parse HEAD); curl -ORL https://github.com/facebook/CacheLib/archive/${COMMIT}/cachelib-${COMMIT}.tar.gz; echo cachelib-${COMMIT}.tar.gz"
-  post-upstream-clone: "bash -c \"rm -rf cachelib-dist-git; git clone -b packit https://pagure.io/meta/cachelib.git cachelib-dist-git && mv cachelib-dist-git/cachelib*.{spec,patch} .\""
-
-jobs:
-- job: copr_build
-  trigger: pull_request
-  metadata:
-    targets:
-    - fedora-rawhide-aarch64
-    - fedora-rawhide-x86_64
-    - fedora-35-aarch64
-    - fedora-35-x86_64
diff --git a/README.md b/README.md
index e05c932d1e..7fc943b202 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ cd CacheLib
 Re-running `./contrib/build.sh` will update CacheLib and its dependencies
 to their latest versions and rebuild them.
 
-See [build](https://cachelib.org/docs/installation/installation) for more details about
+See [build](https://cachelib.org/docs/installation/) for more details about
 the building and installation process.
 
 
@@ -77,3 +77,18 @@ https://www.facebook.com/whitehat
 
 Facebook's security team will triage your report and determine whether or not is
 it eligible for a bounty under our program.
+
+
+## Build status
+
+Clicking on a badge will show you the recent builds for that OS. If your target OS's build is failing, you may wish to check recent issues and PRs for known workarounds.
+
+- [![CentOS 8.1](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-centos-8-1.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-centos-8-1.yml?query=event%3Aschedule)
+- [![CentOS 8.5](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-centos-8-5.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-centos-8-5.yml?query=event%3Aschedule)
+- [![Debian 10](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-debian-10.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-debian-10.yml?query=event%3Aschedule)
+- [![Fedora 36](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-fedora-36.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-fedora-36.yml?query=event%3Aschedule)
+- [![Rocky Linux 8](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-rockylinux-8.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-rockylinux-8.yml?query=event%3Aschedule)
+- [![Rocky Linux 9](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-rockylinux-9.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-rockylinux-9.yml?query=event%3Aschedule)
+- [![Ubuntu 18](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-ubuntu-18.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-ubuntu-18.yml?query=event%3Aschedule)
+- [![Ubuntu 20](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-ubuntu-20.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-ubuntu-20.yml?query=event%3Aschedule)
+- [![Ubuntu 22](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-ubuntu-22.yml/badge.svg?event=schedule)](https://github.com/facebook/cachelib/actions/workflows/build-cachelib-ubuntu-22.yml?query=event%3Aschedule)
diff --git a/cachelib/adaptor/rocks_secondary_cache/tests/CachelibWrapperTest.cpp b/cachelib/adaptor/rocks_secondary_cache/tests/CachelibWrapperTest.cpp
index 5dd416f14d..827c25bd69 100644
--- a/cachelib/adaptor/rocks_secondary_cache/tests/CachelibWrapperTest.cpp
+++ b/cachelib/adaptor/rocks_secondary_cache/tests/CachelibWrapperTest.cpp
@@ -116,6 +116,10 @@ class CachelibWrapperTest : public ::testing::Test {
   }
 #endif
 
+#if ROCKSDB_MAJOR > 8 || (ROCKSDB_MAJOR == 8 && ROCKSDB_MINOR >= 1)
+  static Cache::CacheItemHelper helper_no_secondary_;
+#endif
+
   static Cache::CacheItemHelper helper_;
 
   static Status SaveToCallbackFail(void* /*obj*/,
@@ -191,13 +195,23 @@ class CachelibWrapperTest : public ::testing::Test {
   std::string path_;
 };
 
+#if ROCKSDB_MAJOR > 8 || (ROCKSDB_MAJOR == 8 && ROCKSDB_MINOR >= 1)
+Cache::CacheItemHelper CachelibWrapperTest::helper_no_secondary_(
+    CacheEntryRole::kMisc, CachelibWrapperTest::DeletionCallback);
+#endif
+
 Cache::CacheItemHelper CachelibWrapperTest::helper_(
 #if ROCKSDB_MAJOR > 7 || (ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR >= 10)
     CacheEntryRole::kMisc,
     CachelibWrapperTest::DeletionCallback,
     CachelibWrapperTest::SizeCallback,
     CachelibWrapperTest::SaveToCallback,
+#if ROCKSDB_MAJOR > 8 || (ROCKSDB_MAJOR == 8 && ROCKSDB_MINOR >= 1)
+    CachelibWrapperTest::CreateCallback,
+    &CachelibWrapperTest::helper_no_secondary_);
+#else
     CachelibWrapperTest::CreateCallback);
+#endif
 #else
     CachelibWrapperTest::SizeCallback,
     CachelibWrapperTest::SaveToCallback,
@@ -210,7 +224,12 @@ Cache::CacheItemHelper CachelibWrapperTest::helper_fail_(
     CachelibWrapperTest::DeletionCallback,
     CachelibWrapperTest::SizeCallback,
     CachelibWrapperTest::SaveToCallbackFail,
+#if ROCKSDB_MAJOR > 8 || (ROCKSDB_MAJOR == 8 && ROCKSDB_MINOR >= 1)
+    CachelibWrapperTest::CreateCallback,
+    &CachelibWrapperTest::helper_no_secondary_);
+#else
     CachelibWrapperTest::CreateCallback);
+#endif
 #else
     CachelibWrapperTest::SizeCallback,
     CachelibWrapperTest::SaveToCallbackFail,
diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp
index 5c228ed7b5..7494577989 100644
--- a/cachelib/allocator/Cache.cpp
+++ b/cachelib/allocator/Cache.cpp
@@ -284,6 +284,25 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
   counters_.updateDelta(statPrefix + "reaper.skipped_slabs",
                         stats.numReaperSkippedSlabs);
 
+  counters_.updateDelta(statPrefix + "rebalancer.runs",
+                        stats.rebalancerStats.numRuns);
+  counters_.updateDelta(statPrefix + "rebalancer.rebalanced_slabs",
+                        stats.rebalancerStats.numRebalancedSlabs);
+  counters_.updateCount(statPrefix + "rebalancer.latency.loop_last_ms",
+                        stats.rebalancerStats.lastRebalanceTimeMs);
+  counters_.updateCount(statPrefix + "rebalancer.latency.loop_avg_ms",
+                        stats.rebalancerStats.avgRebalanceTimeMs);
+
+  counters_.updateCount(statPrefix + "rebalancer.latency.release_last_ms",
+                        stats.rebalancerStats.lastReleaseTimeMs);
+  counters_.updateCount(statPrefix + "rebalancer.latency.release_avg_ms",
+                        stats.rebalancerStats.avgReleaseTimeMs);
+
+  counters_.updateCount(statPrefix + "rebalancer.latency.pick_last_ms",
+                        stats.rebalancerStats.lastPickTimeMs);
+  counters_.updateCount(statPrefix + "rebalancer.latency.pick_avg_ms",
+                        stats.rebalancerStats.avgPickTimeMs);
+
   const auto slabReleaseStats = getSlabReleaseStats();
   counters_.updateDelta(statPrefix + "slabs.rebalancer_runs",
                         slabReleaseStats.numSlabReleaseForRebalanceAttempts);
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index 1d89593268..d18aa8b68f 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -54,11 +54,11 @@ CacheAllocator<CacheTrait>::CacheAllocator(
                                           : config.memMonitoringEnabled()},
       config_(config.validate()),
       tempShm_(type == InitMemType::kNone && isOnShm_
-                   ? std::make_unique<TempShmMapping>(config_.size)
+                   ? std::make_unique<TempShmMapping>(config_.getCacheSize())
                    : nullptr),
       shmManager_(type != InitMemType::kNone
                       ? std::make_unique<ShmManager>(config_.cacheDir,
-                                                     config_.usePosixShm)
+                                                     config_.isUsingPosixShm())
                       : nullptr),
       deserializer_(type == InitMemType::kMemAttach ? createDeserializer()
                                                     : nullptr),
@@ -122,10 +122,10 @@ CacheAllocator<CacheTrait>::createNewMemoryAllocator() {
   return std::make_unique<MemoryAllocator>(
       getAllocatorConfig(config_),
       shmManager_
-          ->createShm(detail::kShmCacheName, config_.size,
+          ->createShm(detail::kShmCacheName, config_.getCacheSize(),
                       config_.slabMemoryBaseAddr, createShmCacheOpts())
           .addr,
-      config_.size);
+      config_.getCacheSize());
 }
 
 template <typename CacheTrait>
@@ -137,7 +137,7 @@ CacheAllocator<CacheTrait>::restoreMemoryAllocator() {
           ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr,
                       createShmCacheOpts())
           .addr,
-      config_.size,
+      config_.getCacheSize(),
       config_.disableFullCoredump);
 }
 
@@ -242,11 +242,12 @@ std::unique_ptr<MemoryAllocator> CacheAllocator<CacheTrait>::initAllocator(
     InitMemType type) {
   if (type == InitMemType::kNone) {
     if (isOnShm_ == true) {
-      return std::make_unique<MemoryAllocator>(
-          getAllocatorConfig(config_), tempShm_->getAddr(), config_.size);
+      return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
+                                               tempShm_->getAddr(),
+                                               config_.getCacheSize());
     } else {
       return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
-                                               config_.size);
+                                               config_.getCacheSize());
     }
   } else if (type == InitMemType::kMemNew) {
     return createNewMemoryAllocator();
@@ -363,7 +364,7 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
   } else { // failed to allocate memory.
     (*stats_.allocFailures)[pid][cid].inc();
     // wake up rebalancer
-    if (poolRebalancer_) {
+    if (!config_.poolRebalancerDisableForcedWakeUp && poolRebalancer_) {
       poolRebalancer_->wakeUp();
     }
   }
@@ -832,20 +833,21 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
 
       removeFromMMContainer(*head);
 
-      // If this chained item is marked as exclusive, we will not free it.
-      // We must capture the exclusive state before we do the decRef when
+      // If this chained item is marked as moving, we will not free it.
+      // We must capture the moving state before we do the decRef when
       // we know the item must still be valid
-      const bool wasExclusive = head->isExclusive();
+      const bool wasMoving = head->isMoving();
+      XDCHECK(!head->isMarkedForEviction());
 
       // Decref and check if we were the last reference. Now if the item
-      // was marked exclusive, after decRef, it will be free to be released
+      // was marked moving, after decRef, it will be free to be released
       // by slab release thread
       const auto childRef = head->decRef();
 
-      // If the item is already exclusive and we already decremented the
+      // If the item is already moving and we already decremented the
       // refcount, we don't need to free this item. We'll let the slab
       // release thread take care of that
-      if (!wasExclusive) {
+      if (!wasMoving) {
         if (childRef != 0) {
           throw std::runtime_error(folly::sformat(
               "chained item refcount is not zero. We cannot proceed! "
@@ -853,7 +855,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
               childRef, head->toString()));
         }
 
-        // Item is not exclusive and refcount is 0, we can proceed to
+        // Item is not moving and refcount is 0, we can proceed to
         // free it or recylce the memory
         if (head == toRecycle) {
           XDCHECK(ReleaseRes::kReleased != res);
@@ -881,9 +883,12 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
 }
 
 template <typename CacheTrait>
-void CacheAllocator<CacheTrait>::incRef(Item& it) {
-  it.incRef();
-  ++handleCount_.tlStats();
+bool CacheAllocator<CacheTrait>::incRef(Item& it) {
+  if (it.incRef()) {
+    ++handleCount_.tlStats();
+    return true;
+  }
+  return false;
 }
 
 template <typename CacheTrait>
@@ -903,8 +908,12 @@ CacheAllocator<CacheTrait>::acquire(Item* it) {
 
   SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
 
-  incRef(*it);
-  return WriteHandle{it, *this};
+  if (LIKELY(incRef(*it))) {
+    return WriteHandle{it, *this};
+  } else {
+    // item is being evicted
+    return WriteHandle{};
+  }
 }
 
 template <typename CacheTrait>
@@ -1179,7 +1188,7 @@ bool CacheAllocator<CacheTrait>::moveChainedItem(ChainedItem& oldItem,
 
   // This item has been unlinked from its parent and we're the only
   // owner of it, so we're done here
-  if (!oldItem.isInMMContainer() || oldItem.isOnlyExclusive()) {
+  if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) {
     return false;
   }
 
@@ -1210,7 +1219,7 @@ bool CacheAllocator<CacheTrait>::moveChainedItem(ChainedItem& oldItem,
 
   // In case someone else had removed this chained item from its parent by now
   // So we check again to see if the it has been unlinked from its parent
-  if (!oldItem.isInMMContainer() || oldItem.isOnlyExclusive()) {
+  if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) {
     return false;
   }
 
@@ -1226,7 +1235,7 @@ bool CacheAllocator<CacheTrait>::moveChainedItem(ChainedItem& oldItem,
   // parent's chain and the MMContainer.
   auto oldItemHandle =
       replaceChainedItemLocked(oldItem, std::move(newItemHdl), *parentHandle);
-  XDCHECK(oldItemHandle->isExclusive());
+  XDCHECK(oldItemHandle->isMoving());
   XDCHECK(!oldItemHandle->isInMMContainer());
 
   return true;
@@ -1255,7 +1264,7 @@ CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
             : toRecycle;
 
     // make sure no other thead is evicting the item
-    if (candidate->getRefCount() != 0 || !candidate->markExclusive()) {
+    if (candidate->getRefCount() != 0 || !candidate->markMoving()) {
       ++itr;
       continue;
     }
@@ -1270,11 +1279,11 @@ CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
               ? advanceIteratorAndTryEvictChainedItem(itr)
               : advanceIteratorAndTryEvictRegularItem(mmContainer, itr);
       evictionSuccessful = toReleaseHandle != nullptr;
-      // destroy toReleseHandle. The item won't be released to allocator
-      // since we marked it as exclusive.
+      // destroy toReleaseHandle. The item won't be released to allocator
+      // since we marked for eviction.
     }
 
-    const auto ref = candidate->unmarkExclusive();
+    const auto ref = candidate->unmarkMoving();
     if (ref == 0u) {
       // Invalidate iterator since later on we may use this mmContainer
       // again, which cannot be done unless we drop this iterator
@@ -2251,7 +2260,7 @@ PoolEvictionAgeStats CacheAllocator<CacheTrait>::getPoolEvictionAgeStats(
 template <typename CacheTrait>
 CacheMetadata CacheAllocator<CacheTrait>::getCacheMetadata() const noexcept {
   return CacheMetadata{kCachelibVersion, kCacheRamFormatVersion,
-                       kCacheNvmFormatVersion, config_.size};
+                       kCacheNvmFormatVersion, config_.getCacheSize()};
 }
 
 template <typename CacheTrait>
@@ -2361,7 +2370,7 @@ void CacheAllocator<CacheTrait>::releaseSlabImpl(
     // Need to mark an item for release before proceeding
     // If we can't mark as moving, it means the item is already freed
     const bool isAlreadyFreed =
-        !markExclusiveForSlabRelease(releaseContext, alloc, throttler);
+        !markMovingForSlabRelease(releaseContext, alloc, throttler);
     if (isAlreadyFreed) {
       continue;
     }
@@ -2406,8 +2415,8 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
     stats_.numMoveAttempts.inc();
 
     // Nothing to move and the key is likely also bogus for chained items.
-    if (oldItem.isOnlyExclusive()) {
-      oldItem.unmarkExclusive();
+    if (oldItem.isOnlyMoving()) {
+      oldItem.unmarkMoving();
       const auto res =
           releaseBackToAllocator(oldItem, RemoveContext::kNormal, false);
       XDCHECK(res == ReleaseRes::kReleased);
@@ -2446,7 +2455,7 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
   // that's identical to this one to replace it. Here we just need to wait
   // until all users have dropped the item handles before we can proceed.
   startTime = util::getCurrentTimeSec();
-  while (!oldItem.isOnlyExclusive()) {
+  while (!oldItem.isOnlyMoving()) {
     throttleWith(throttler, [&] {
       XLOGF(WARN,
             "Spent {} seconds, slab release still waiting for refcount to "
@@ -2500,8 +2509,8 @@ CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
       return {};
     }
 
-    // Set up the destination for the move. Since oldChainedItem would have
-    // the exclusive bit set, it won't be picked for eviction.
+    // Set up the destination for the move. Since oldChainedItem would be
+    // marked as moving, it won't be picked for eviction.
     auto newItemHdl =
         allocateChainedItemInternal(parentHandle, oldChainedItem.getSize());
     if (!newItemHdl) {
@@ -2553,7 +2562,7 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
       // item is still valid.
       const std::string parentKey =
           oldItem.asChainedItem().getParentItem(compressor_).getKey().str();
-      if (oldItem.isOnlyExclusive()) {
+      if (oldItem.isOnlyMoving()) {
         // If chained item no longer has a refcount, its parent is already
         // being released, so we abort this try to moving.
         return false;
@@ -2583,10 +2592,10 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
     stats_.numEvictionAttempts.inc();
 
     // if the item is already in a state where only the exclusive bit is set,
-    // nothing needs to be done. We simply need to unmark exclusive bit and free
+    // nothing needs to be done. We simply need to call unmarkMoving and free
     // the item.
-    if (item.isOnlyExclusive()) {
-      item.unmarkExclusive();
+    if (item.isOnlyMoving()) {
+      item.unmarkMoving();
       const auto res =
           releaseBackToAllocator(item, RemoveContext::kNormal, false);
       XDCHECK(ReleaseRes::kReleased == res);
@@ -2617,7 +2626,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
       stats_.numEvictionSuccesses.inc();
 
       // we have the last handle. no longer need to hold on to the exclusive bit
-      item.unmarkExclusive();
+      item.unmarkMoving();
 
       // manually decrement the refcount to call releaseBackToAllocator
       const auto ref = decRef(*owningHandle);
@@ -2629,7 +2638,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
     }
 
     if (shutDownInProgress_) {
-      item.unmarkExclusive();
+      item.unmarkMoving();
       allocator_->abortSlabRelease(ctx);
       throw exception::SlabReleaseAborted(
           folly::sformat("Slab Release aborted while trying to evict"
@@ -2775,9 +2784,9 @@ CacheAllocator<CacheTrait>::advanceIteratorAndTryEvictChainedItem(
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
-  XDCHECK(item.isExclusive());
+  XDCHECK(item.isMoving());
 
-  if (item.isOnlyExclusive()) {
+  if (item.isOnlyMoving()) {
     return WriteHandle{};
   }
 
@@ -2789,7 +2798,7 @@ CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
 
   // We remove the item from both access and mm containers. It doesn't matter
   // if someone else calls remove on the item at this moment, the item cannot
-  // be freed as long as we have the exclusive bit set.
+  // be freed as long as it's marked for eviction.
   auto handle = accessContainer_->removeIf(item, std::move(predicate));
 
   if (!handle) {
@@ -2813,7 +2822,7 @@ CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::evictChainedItemForSlabRelease(ChainedItem& child) {
-  XDCHECK(child.isExclusive());
+  XDCHECK(child.isMoving());
 
   // We have the child marked as moving, but dont know anything about the
   // state of the parent. Unlike the case of regular eviction where we are
@@ -2835,7 +2844,7 @@ CacheAllocator<CacheTrait>::evictChainedItemForSlabRelease(ChainedItem& child) {
   // check if the child is still in mmContainer and the expected parent is
   // valid under the chained item lock.
   if (expectedParent.getKey() != parentKey || !child.isInMMContainer() ||
-      child.isOnlyExclusive() ||
+      child.isOnlyMoving() ||
       &expectedParent != &child.getParentItem(compressor_) ||
       !expectedParent.isAccessible() || !expectedParent.hasChainedItem()) {
     return {};
@@ -2890,14 +2899,14 @@ CacheAllocator<CacheTrait>::evictChainedItemForSlabRelease(ChainedItem& child) {
 
   // In case someone else had removed this chained item from its parent by now
   // So we check again to see if it has been unlinked from its parent
-  if (!child.isInMMContainer() || child.isOnlyExclusive()) {
+  if (!child.isInMMContainer() || child.isOnlyMoving()) {
     return {};
   }
 
   // check after removing from the MMContainer that the parent is still not
   // being marked as moving. If parent is moving, it will release the child
   // item and we will wait for that.
-  if (parentHandle->isExclusive()) {
+  if (parentHandle->isMoving()) {
     return {};
   }
 
@@ -2930,7 +2939,7 @@ bool CacheAllocator<CacheTrait>::removeIfExpired(const ReadHandle& handle) {
 }
 
 template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::markExclusiveForSlabRelease(
+bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
     const SlabReleaseContext& ctx, void* alloc, util::Throttler& throttler) {
   // MemoryAllocator::processAllocForRelease will execute the callback
   // if the item is not already free. So there are three outcomes here:
@@ -2949,7 +2958,7 @@ bool CacheAllocator<CacheTrait>::markExclusiveForSlabRelease(
     // Since this callback is executed, the item is not yet freed
     itemFreed = false;
     Item* item = static_cast<Item*>(memory);
-    if (item->markExclusive()) {
+    if (item->markMoving()) {
       markedMoving = true;
     }
   };
@@ -3403,6 +3412,7 @@ GlobalCacheStats CacheAllocator<CacheTrait>::getGlobalCacheStats() const {
   ret.nvmUpTime = currTime - nvmCacheState_.getCreationTime();
   ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false;
   ret.reaperStats = getReaperStats();
+  ret.rebalancerStats = getRebalancerStats();
   ret.numActiveHandles = getNumActiveHandles();
 
   ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_;
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index ed0096390a..692f42bec9 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -1146,6 +1146,13 @@ class CacheAllocator : public CacheBase {
     return stats;
   }
 
+  // returns the pool rebalancer stats
+  RebalancerStats getRebalancerStats() const {
+    auto stats =
+        poolRebalancer_ ? poolRebalancer_->getStats() : RebalancerStats{};
+    return stats;
+  }
+
   // return the LruType of an item
   typename MMType::LruType getItemLruType(const Item& item) const;
 
@@ -1308,7 +1315,7 @@ class CacheAllocator : public CacheBase {
 
  private:
   // wrapper around Item's refcount and active handle tracking
-  FOLLY_ALWAYS_INLINE void incRef(Item& it);
+  FOLLY_ALWAYS_INLINE bool incRef(Item& it);
   FOLLY_ALWAYS_INLINE RefcountWithFlags::Value decRef(Item& it);
 
   // drops the refcount and if needed, frees the allocation back to the memory
@@ -1756,9 +1763,9 @@ class CacheAllocator : public CacheBase {
 
   // @return  true when successfully marked as moving,
   //          fasle when this item has already been freed
-  bool markExclusiveForSlabRelease(const SlabReleaseContext& ctx,
-                                   void* alloc,
-                                   util::Throttler& throttler);
+  bool markMovingForSlabRelease(const SlabReleaseContext& ctx,
+                                void* alloc,
+                                util::Throttler& throttler);
 
   // "Move" (by copying) the content in this item to another memory
   // location by invoking the move callback.
@@ -1937,7 +1944,7 @@ class CacheAllocator : public CacheBase {
   }
 
   static bool parentEvictForSlabReleasePredicate(const Item& item) {
-    return item.getRefCount() == 1 && !item.isExclusive();
+    return item.getRefCount() == 1 && !item.isMoving();
   }
 
   std::unique_ptr<Deserializer> createDeserializer();
diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h
index ec44ff8467..59846b8060 100644
--- a/cachelib/allocator/CacheAllocatorConfig.h
+++ b/cachelib/allocator/CacheAllocatorConfig.h
@@ -241,10 +241,12 @@ class CacheAllocatorConfig {
   // slab memory distributed across different allocation classes. For example,
   // if the 64 bytes allocation classes are receiving for allocation requests,
   // eventually CacheAllocator will move more memory to it from other allocation
-  // classes. For more details, see our user guide.
+  // classes. The rebalancing is triggered every specified interval and
+  // optionally on allocation failures. For more details, see our user guide.
   CacheAllocatorConfig& enablePoolRebalancing(
       std::shared_ptr<RebalanceStrategy> defaultRebalanceStrategy,
-      std::chrono::milliseconds interval);
+      std::chrono::milliseconds interval,
+      bool disableForcedWakeup = false);
 
   // This lets you change pool size during runtime, and the pool resizer
   // will slowly adjust each pool's memory size to the newly configured sizes.
@@ -434,6 +436,9 @@ class CacheAllocatorConfig {
   // time interval to sleep between iterators of rebalancing the pools.
   std::chrono::milliseconds poolRebalanceInterval{std::chrono::seconds{1}};
 
+  // disable waking up the PoolRebalancer on alloc failures
+  bool poolRebalancerDisableForcedWakeUp{false};
+
   // Free slabs pro-actively if the ratio of number of freeallocs to
   // the number of allocs per slab in a slab class is above this
   // threshold
@@ -913,10 +918,12 @@ CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enablePoolOptimizer(
 template <typename T>
 CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enablePoolRebalancing(
     std::shared_ptr<RebalanceStrategy> defaultRebalanceStrategy,
-    std::chrono::milliseconds interval) {
+    std::chrono::milliseconds interval,
+    bool disableForcedWakeup) {
   if (validateStrategy(defaultRebalanceStrategy)) {
     defaultPoolRebalanceStrategy = defaultRebalanceStrategy;
     poolRebalanceInterval = interval;
+    poolRebalancerDisableForcedWakeUp = disableForcedWakeup;
   } else {
     throw std::invalid_argument(
         "Invalid rebalance strategy for the cache allocator.");
@@ -1085,7 +1092,7 @@ std::map<std::string, std::string> CacheAllocatorConfig<T>::serialize() const {
 
   configMap["size"] = std::to_string(size);
   configMap["cacheDir"] = cacheDir;
-  configMap["posixShm"] = usePosixShm ? "set" : "empty";
+  configMap["posixShm"] = isUsingPosixShm() ? "set" : "empty";
 
   configMap["defaultAllocSizes"] = "";
   // Stringify std::set
diff --git a/cachelib/allocator/CacheItem-inl.h b/cachelib/allocator/CacheItem-inl.h
index f59fa9d599..bf77b43aa5 100644
--- a/cachelib/allocator/CacheItem-inl.h
+++ b/cachelib/allocator/CacheItem-inl.h
@@ -148,15 +148,16 @@ std::string CacheItem<CacheTrait>::toString() const {
     return folly::sformat(
         "item: "
         "memory={}:raw-ref={}:size={}:key={}:hex-key={}:"
-        "isInMMContainer={}:isAccessible={}:isExclusive={}:references={}:ctime="
+        "isInMMContainer={}:isAccessible={}:isMarkedForEviction={}:"
+        "isMoving={}:references={}:ctime="
         "{}:"
         "expTime={}:updateTime={}:isNvmClean={}:isNvmEvicted={}:hasChainedItem="
         "{}",
         this, getRefCountAndFlagsRaw(), getSize(),
         folly::humanify(getKey().str()), folly::hexlify(getKey()),
-        isInMMContainer(), isAccessible(), isExclusive(), getRefCount(),
-        getCreationTime(), getExpiryTime(), getLastAccessTime(), isNvmClean(),
-        isNvmEvicted(), hasChainedItem());
+        isInMMContainer(), isAccessible(), isMarkedForEviction(), isMoving(),
+        getRefCount(), getCreationTime(), getExpiryTime(), getLastAccessTime(),
+        isNvmClean(), isNvmEvicted(), hasChainedItem());
   }
 }
 
@@ -217,23 +218,43 @@ bool CacheItem<CacheTrait>::isInMMContainer() const noexcept {
 }
 
 template <typename CacheTrait>
-bool CacheItem<CacheTrait>::markExclusive() noexcept {
-  return ref_.markExclusive();
+bool CacheItem<CacheTrait>::markForEviction() noexcept {
+  return ref_.markForEviction();
 }
 
 template <typename CacheTrait>
-RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkExclusive() noexcept {
-  return ref_.unmarkExclusive();
+RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkForEviction() noexcept {
+  return ref_.unmarkForEviction();
 }
 
 template <typename CacheTrait>
-bool CacheItem<CacheTrait>::isExclusive() const noexcept {
-  return ref_.isExclusive();
+bool CacheItem<CacheTrait>::isMarkedForEviction() const noexcept {
+  return ref_.isMarkedForEviction();
 }
 
 template <typename CacheTrait>
-bool CacheItem<CacheTrait>::isOnlyExclusive() const noexcept {
-  return ref_.isOnlyExclusive();
+bool CacheItem<CacheTrait>::markForEvictionWhenMoving() {
+  return ref_.markForEvictionWhenMoving();
+}
+
+template <typename CacheTrait>
+bool CacheItem<CacheTrait>::markMoving() {
+  return ref_.markMoving();
+}
+
+template <typename CacheTrait>
+RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkMoving() noexcept {
+  return ref_.unmarkMoving();
+}
+
+template <typename CacheTrait>
+bool CacheItem<CacheTrait>::isMoving() const noexcept {
+  return ref_.isMoving();
+}
+
+template <typename CacheTrait>
+bool CacheItem<CacheTrait>::isOnlyMoving() const noexcept {
+  return ref_.isOnlyMoving();
 }
 
 template <typename CacheTrait>
@@ -335,7 +356,8 @@ bool CacheItem<CacheTrait>::updateExpiryTime(uint32_t expiryTimeSecs) noexcept {
   // check for moving to make sure we are not updating the expiry time while at
   // the same time re-allocating the item with the old state of the expiry time
   // in moveRegularItem(). See D6852328
-  if (isExclusive() || !isInMMContainer() || isChainedItem()) {
+  if (isMoving() || isMarkedForEviction() || !isInMMContainer() ||
+      isChainedItem()) {
     return false;
   }
   // attempt to atomically update the value of expiryTime
@@ -451,12 +473,14 @@ std::string CacheChainedItem<CacheTrait>::toString() const {
   return folly::sformat(
       "chained item: "
       "memory={}:raw-ref={}:size={}:parent-compressed-ptr={}:"
-      "isInMMContainer={}:isAccessible={}:isExclusive={}:references={}:ctime={}"
+      "isInMMContainer={}:isAccessible={}:isMarkedForEviction={}:"
+      "isMoving={}:references={}:ctime={}"
       ":"
       "expTime={}:updateTime={}",
       this, Item::getRefCountAndFlagsRaw(), Item::getSize(), cPtr.getRaw(),
-      Item::isInMMContainer(), Item::isAccessible(), Item::isExclusive(),
-      Item::getRefCount(), Item::getCreationTime(), Item::getExpiryTime(),
+      Item::isInMMContainer(), Item::isAccessible(),
+      Item::isMarkedForEviction(), Item::isMoving(), Item::getRefCount(),
+      Item::getCreationTime(), Item::getExpiryTime(),
       Item::getLastAccessTime());
 }
 
diff --git a/cachelib/allocator/CacheItem.h b/cachelib/allocator/CacheItem.h
index 06136db032..afee315cbb 100644
--- a/cachelib/allocator/CacheItem.h
+++ b/cachelib/allocator/CacheItem.h
@@ -305,12 +305,17 @@ class CACHELIB_PACKED_ATTR CacheItem {
    */
   RefcountWithFlags::Value getRefCountAndFlagsRaw() const noexcept;
 
-  FOLLY_ALWAYS_INLINE void incRef() {
-    if (LIKELY(ref_.incRef())) {
-      return;
+  // Increments item's ref count
+  //
+  // @return true on success, failure if item is marked as exclusive
+  // @throw exception::RefcountOverflow on ref count overflow
+  FOLLY_ALWAYS_INLINE bool incRef() {
+    try {
+      return ref_.incRef();
+    } catch (exception::RefcountOverflow& e) {
+      throw exception::RefcountOverflow(
+          folly::sformat("{} item: {}", e.what(), toString()));
     }
-    throw exception::RefcountOverflow(
-        folly::sformat("Refcount maxed out. item: {}", toString()));
   }
 
   FOLLY_ALWAYS_INLINE RefcountWithFlags::Value decRef() {
@@ -344,23 +349,43 @@ class CACHELIB_PACKED_ATTR CacheItem {
 
   /**
    * The following two functions corresond to whether or not an item is
-   * currently in the process of being moved. This happens during a slab
-   * rebalance, eviction or resize operation.
+   * currently in the process of being evicted.
    *
-   * An item can only be marked exclusive when `isInMMContainer` returns true.
+   * An item can only be marked exclusive when `isInMMContainer` returns true
+   * and item is not already exclusive nor moving and the ref count is 0.
    * This operation is atomic.
    *
-   * User can also query if an item "isOnlyExclusive". This returns true only
-   * if the refcount is 0 and only the exclusive bit is set.
-   *
-   * Unmarking exclusive does not depend on `isInMMContainer`.
+   * Unmarking exclusive does not depend on `isInMMContainer`
    * Unmarking exclusive will also return the refcount at the moment of
    * unmarking.
    */
-  bool markExclusive() noexcept;
-  RefcountWithFlags::Value unmarkExclusive() noexcept;
-  bool isExclusive() const noexcept;
-  bool isOnlyExclusive() const noexcept;
+  bool markForEviction() noexcept;
+  RefcountWithFlags::Value unmarkForEviction() noexcept;
+  bool isMarkedForEviction() const noexcept;
+
+  /**
+   * The following functions correspond to whether or not an item is
+   * currently in the processed of being moved. When moving, ref count
+   * is always >= 1.
+   *
+   * An item can only be marked moving when `isInMMContainer` returns true
+   * and item is not already exclusive nor moving.
+   *
+   * User can also query if an item "isOnlyMoving". This returns true only
+   * if the refcount is one and only the exclusive bit is set.
+   *
+   * Unmarking moving does not depend on `isInMMContainer`
+   * Unmarking moving will also return the refcount at the moment of
+   * unmarking.
+   */
+  bool markMoving();
+  RefcountWithFlags::Value unmarkMoving() noexcept;
+  bool isMoving() const noexcept;
+  bool isOnlyMoving() const noexcept;
+
+  /** This function attempts to mark item as exclusive.
+   * Can only be called on the item that is moving.*/
+  bool markForEvictionWhenMoving();
 
   /**
    * Item cannot be marked both chained allocation and
diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h
index fb9955b805..b51be687a4 100644
--- a/cachelib/allocator/CacheStats.h
+++ b/cachelib/allocator/CacheStats.h
@@ -289,6 +289,22 @@ struct ReaperStats {
   uint64_t avgTraversalTimeMs{0};
 };
 
+// Stats for reaper
+struct RebalancerStats {
+  uint64_t numRuns{0};
+
+  uint64_t numRebalancedSlabs{0};
+
+  uint64_t lastRebalanceTimeMs{0};
+  uint64_t avgRebalanceTimeMs{0};
+
+  uint64_t lastReleaseTimeMs{0};
+  uint64_t avgReleaseTimeMs{0};
+
+  uint64_t lastPickTimeMs{0};
+  uint64_t avgPickTimeMs{0};
+};
+
 // CacheMetadata type to export
 struct CacheMetadata {
   // allocator_version
@@ -494,6 +510,9 @@ struct GlobalCacheStats {
   // stats related to the reaper
   ReaperStats reaperStats;
 
+  // stats related to the pool rebalancer
+  RebalancerStats rebalancerStats;
+
   uint64_t numNvmRejectsByExpiry{};
   uint64_t numNvmRejectsByClean{};
   uint64_t numNvmRejectsByAP{};
diff --git a/cachelib/allocator/FreeMemStrategy.cpp b/cachelib/allocator/FreeMemStrategy.cpp
index 5ddce0c9b5..2a386ace7d 100644
--- a/cachelib/allocator/FreeMemStrategy.cpp
+++ b/cachelib/allocator/FreeMemStrategy.cpp
@@ -37,15 +37,13 @@ FreeMemStrategy::FreeMemStrategy(Config config)
 //
 // 2. Pick the first class we find with free memory past the threshold
 RebalanceContext FreeMemStrategy::pickVictimAndReceiverImpl(
-    const CacheBase& cache, PoolId pid) {
+    const CacheBase& cache, PoolId pid, const PoolStats& poolStats) {
   const auto& pool = cache.getPool(pid);
   if (pool.getUnAllocatedSlabMemory() >
       config_.maxUnAllocatedSlabs * Slab::kSize) {
     return kNoOpContext;
   }
 
-  const auto poolStats = cache.getPoolStats(pid);
-
   // ignore allocation classes that have fewer than the threshold of slabs.
   const auto victims = filterByNumEvictableSlabs(
       poolStats, std::move(poolStats.getClassIds()), config_.minSlabs);
diff --git a/cachelib/allocator/FreeMemStrategy.h b/cachelib/allocator/FreeMemStrategy.h
index ad07137c89..f0e0586a52 100644
--- a/cachelib/allocator/FreeMemStrategy.h
+++ b/cachelib/allocator/FreeMemStrategy.h
@@ -53,7 +53,8 @@ class FreeMemStrategy : public RebalanceStrategy {
   explicit FreeMemStrategy(Config config = {});
 
   RebalanceContext pickVictimAndReceiverImpl(const CacheBase& cache,
-                                             PoolId pid) final;
+                                             PoolId pid,
+                                             const PoolStats& poolStats) final;
 
  private:
   const Config config_;
diff --git a/cachelib/allocator/Handle.h b/cachelib/allocator/Handle.h
index a125ace1b7..11d2bed2be 100644
--- a/cachelib/allocator/Handle.h
+++ b/cachelib/allocator/Handle.h
@@ -242,8 +242,6 @@ struct ReadHandleImpl {
     return hdl;
   }
 
-  bool isWriteHandle() const { return false; }
-
  protected:
   // accessor. Calling getInternal() on handle with isReady() == false blocks
   // the thread until the handle is ready.
@@ -571,8 +569,6 @@ struct WriteHandleImpl : public ReadHandleImpl<T> {
   //        creating this item handle.
   WriteHandleImpl clone() const { return WriteHandleImpl{ReadHandle::clone()}; }
 
-  bool isWriteHandle() const { return true; }
-
   // Friends
   friend ReadHandle;
   // Only CacheAllocator and NvmCache can create non-default constructed handles
diff --git a/cachelib/allocator/HitsPerSlabStrategy.cpp b/cachelib/allocator/HitsPerSlabStrategy.cpp
index b312292b4d..9b3b59af90 100644
--- a/cachelib/allocator/HitsPerSlabStrategy.cpp
+++ b/cachelib/allocator/HitsPerSlabStrategy.cpp
@@ -121,7 +121,7 @@ ClassId HitsPerSlabStrategy::pickReceiver(const Config& config,
 }
 
 RebalanceContext HitsPerSlabStrategy::pickVictimAndReceiverImpl(
-    const CacheBase& cache, PoolId pid) {
+    const CacheBase& cache, PoolId pid, const PoolStats& poolStats) {
   if (!cache.getPool(pid).allSlabsAllocated()) {
     XLOGF(DBG,
           "Pool Id: {}"
@@ -131,8 +131,6 @@ RebalanceContext HitsPerSlabStrategy::pickVictimAndReceiverImpl(
     return kNoOpContext;
   }
 
-  const auto poolStats = cache.getPoolStats(pid);
-
   const auto config = getConfigCopy();
 
   RebalanceContext ctx;
@@ -189,8 +187,8 @@ RebalanceContext HitsPerSlabStrategy::pickVictimAndReceiverImpl(
 }
 
 ClassId HitsPerSlabStrategy::pickVictimImpl(const CacheBase& cache,
-                                            PoolId pid) {
-  const auto poolStats = cache.getPoolStats(pid);
+                                            PoolId pid,
+                                            const PoolStats& poolStats) {
   const auto config = getConfigCopy();
   auto victimClassId = pickVictim(config, cache, pid, poolStats);
 
diff --git a/cachelib/allocator/HitsPerSlabStrategy.h b/cachelib/allocator/HitsPerSlabStrategy.h
index 3668097540..41f29f8978 100644
--- a/cachelib/allocator/HitsPerSlabStrategy.h
+++ b/cachelib/allocator/HitsPerSlabStrategy.h
@@ -88,10 +88,14 @@ class HitsPerSlabStrategy : public RebalanceStrategy {
     return config_;
   }
 
-  RebalanceContext pickVictimAndReceiverImpl(const CacheBase& cache,
-                                             PoolId pid) override final;
-
-  ClassId pickVictimImpl(const CacheBase& cache, PoolId pid) override final;
+  RebalanceContext pickVictimAndReceiverImpl(
+      const CacheBase& cache,
+      PoolId pid,
+      const PoolStats& poolStats) override final;
+
+  ClassId pickVictimImpl(const CacheBase& cache,
+                         PoolId pid,
+                         const PoolStats& poolStats) override final;
 
  private:
   static AllocInfo makeAllocInfo(PoolId pid,
diff --git a/cachelib/allocator/LruTailAgeStrategy.cpp b/cachelib/allocator/LruTailAgeStrategy.cpp
index 72a34b93b5..0bd7148935 100644
--- a/cachelib/allocator/LruTailAgeStrategy.cpp
+++ b/cachelib/allocator/LruTailAgeStrategy.cpp
@@ -138,7 +138,7 @@ ClassId LruTailAgeStrategy::pickReceiver(
 }
 
 RebalanceContext LruTailAgeStrategy::pickVictimAndReceiverImpl(
-    const CacheBase& cache, PoolId pid) {
+    const CacheBase& cache, PoolId pid, const PoolStats& poolStats) {
   if (!cache.getPool(pid).allSlabsAllocated()) {
     XLOGF(DBG,
           "Pool Id: {}"
@@ -151,7 +151,6 @@ RebalanceContext LruTailAgeStrategy::pickVictimAndReceiverImpl(
 
   const auto config = getConfigCopy();
 
-  const auto poolStats = cache.getPoolStats(pid);
   const auto poolEvictionAgeStats =
       cache.getPoolEvictionAgeStats(pid, config.slabProjectionLength);
 
@@ -190,11 +189,13 @@ RebalanceContext LruTailAgeStrategy::pickVictimAndReceiverImpl(
   return ctx;
 }
 
-ClassId LruTailAgeStrategy::pickVictimImpl(const CacheBase& cache, PoolId pid) {
+ClassId LruTailAgeStrategy::pickVictimImpl(const CacheBase& cache,
+                                           PoolId pid,
+                                           const PoolStats& poolStats) {
   const auto config = getConfigCopy();
   const auto poolEvictionAgeStats =
       cache.getPoolEvictionAgeStats(pid, config.slabProjectionLength);
-  return pickVictim(config, pid, cache.getPoolStats(pid), poolEvictionAgeStats);
+  return pickVictim(config, pid, poolStats, poolEvictionAgeStats);
 }
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/allocator/LruTailAgeStrategy.h b/cachelib/allocator/LruTailAgeStrategy.h
index f7e4ceb58d..d43757f60d 100644
--- a/cachelib/allocator/LruTailAgeStrategy.h
+++ b/cachelib/allocator/LruTailAgeStrategy.h
@@ -95,10 +95,14 @@ class LruTailAgeStrategy : public RebalanceStrategy {
     return config_;
   }
 
-  RebalanceContext pickVictimAndReceiverImpl(const CacheBase& cache,
-                                             PoolId pid) override final;
-
-  ClassId pickVictimImpl(const CacheBase& cache, PoolId pid) override final;
+  RebalanceContext pickVictimAndReceiverImpl(
+      const CacheBase& cache,
+      PoolId pid,
+      const PoolStats& poolStats) override final;
+
+  ClassId pickVictimImpl(const CacheBase& cache,
+                         PoolId pid,
+                         const PoolStats& poolStats) override final;
 
  private:
   static AllocInfo makeAllocInfo(PoolId pid,
diff --git a/cachelib/allocator/MarginalHitsStrategy.cpp b/cachelib/allocator/MarginalHitsStrategy.cpp
index 83a17252d6..1f32584224 100644
--- a/cachelib/allocator/MarginalHitsStrategy.cpp
+++ b/cachelib/allocator/MarginalHitsStrategy.cpp
@@ -30,7 +30,7 @@ MarginalHitsStrategy::MarginalHitsStrategy(Config config)
     : RebalanceStrategy(MarginalHits), config_(std::move(config)) {}
 
 RebalanceContext MarginalHitsStrategy::pickVictimAndReceiverImpl(
-    const CacheBase& cache, PoolId pid) {
+    const CacheBase& cache, PoolId pid, const PoolStats& poolStats) {
   const auto config = getConfigCopy();
   if (!cache.getPool(pid).allSlabsAllocated()) {
     XLOGF(DBG,
@@ -39,7 +39,6 @@ RebalanceContext MarginalHitsStrategy::pickVictimAndReceiverImpl(
           static_cast<int>(pid));
     return kNoOpContext;
   }
-  auto poolStats = cache.getPoolStats(pid);
   auto scores = computeClassMarginalHits(pid, poolStats);
   auto classesSet = poolStats.getClassIds();
   std::vector<ClassId> classes(classesSet.begin(), classesSet.end());
@@ -66,8 +65,9 @@ RebalanceContext MarginalHitsStrategy::pickVictimAndReceiverImpl(
 }
 
 ClassId MarginalHitsStrategy::pickVictimImpl(const CacheBase& cache,
-                                             PoolId pid) {
-  return pickVictimAndReceiverImpl(cache, pid).victimClassId;
+                                             PoolId pid,
+                                             const PoolStats& stats) {
+  return pickVictimAndReceiverImpl(cache, pid, stats).victimClassId;
 }
 
 std::unordered_map<ClassId, double>
diff --git a/cachelib/allocator/MarginalHitsStrategy.h b/cachelib/allocator/MarginalHitsStrategy.h
index 851e5982f3..d6e350984d 100644
--- a/cachelib/allocator/MarginalHitsStrategy.h
+++ b/cachelib/allocator/MarginalHitsStrategy.h
@@ -66,11 +66,15 @@ class MarginalHitsStrategy : public RebalanceStrategy {
   }
 
   // pick victim and receiver classes from a pool
-  RebalanceContext pickVictimAndReceiverImpl(const CacheBase& cache,
-                                             PoolId pid) override final;
+  RebalanceContext pickVictimAndReceiverImpl(
+      const CacheBase& cache,
+      PoolId pid,
+      const PoolStats& poolStats) override final;
 
   // pick victim class from a pool to shrink
-  ClassId pickVictimImpl(const CacheBase& cache, PoolId pid) override final;
+  ClassId pickVictimImpl(const CacheBase& cache,
+                         PoolId pid,
+                         const PoolStats& poolStats) override final;
 
  private:
   // compute delta of tail hits for every class in this pool
diff --git a/cachelib/allocator/PoolRebalancer.cpp b/cachelib/allocator/PoolRebalancer.cpp
index 6c963c80b8..ff1c2a46de 100644
--- a/cachelib/allocator/PoolRebalancer.cpp
+++ b/cachelib/allocator/PoolRebalancer.cpp
@@ -103,6 +103,8 @@ RebalanceContext PoolRebalancer::pickVictimByFreeAlloc(PoolId pid) const {
 }
 
 bool PoolRebalancer::tryRebalancing(PoolId pid, RebalanceStrategy& strategy) {
+  const auto begin = util::getCurrentTimeMs();
+
   if (freeAllocThreshold_ > 0) {
     auto ctx = pickVictimByFreeAlloc(pid);
     if (ctx.victimClassId != Slab::kInvalidClassId) {
@@ -114,16 +116,41 @@ bool PoolRebalancer::tryRebalancing(PoolId pid, RebalanceStrategy& strategy) {
     return false;
   }
 
+  auto currentTimeSec = util::getCurrentTimeMs();
   const auto context = strategy.pickVictimAndReceiver(cache_, pid);
+  auto end = util::getCurrentTimeMs();
+  pickVictimStats_.recordLoopTime(end > currentTimeSec ? end - currentTimeSec
+                                                       : 0);
+
   if (context.victimClassId == Slab::kInvalidClassId) {
     XLOGF(DBG,
           "Pool Id: {} rebalancing strategy didn't find an victim",
           static_cast<int>(pid));
     return false;
   }
+  currentTimeSec = util::getCurrentTimeMs();
   releaseSlab(pid, context.victimClassId, context.receiverClassId);
+  end = util::getCurrentTimeMs();
+  releaseStats_.recordLoopTime(end > currentTimeSec ? end - currentTimeSec : 0);
+  rebalanceStats_.recordLoopTime(end > begin ? end - begin : 0);
+
   return true;
 }
 
+RebalancerStats PoolRebalancer::getStats() const noexcept {
+  RebalancerStats stats;
+  stats.numRuns = getRunCount();
+  stats.numRebalancedSlabs = rebalanceStats_.getNumLoops();
+  stats.lastRebalanceTimeMs = rebalanceStats_.getLastLoopTimeMs();
+  stats.avgRebalanceTimeMs = rebalanceStats_.getAvgLoopTimeMs();
+
+  stats.lastReleaseTimeMs = releaseStats_.getLastLoopTimeMs();
+  stats.avgReleaseTimeMs = releaseStats_.getAvgLoopTimeMs();
+
+  stats.lastPickTimeMs = pickVictimStats_.getLastLoopTimeMs();
+  stats.avgPickTimeMs = pickVictimStats_.getAvgLoopTimeMs();
+  return stats;
+}
+
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/allocator/PoolRebalancer.h b/cachelib/allocator/PoolRebalancer.h
index 4a3ee14638..4238aacea6 100644
--- a/cachelib/allocator/PoolRebalancer.h
+++ b/cachelib/allocator/PoolRebalancer.h
@@ -53,7 +53,30 @@ class PoolRebalancer : public PeriodicWorker {
     return stats_.getSlabReleaseEvents(pid);
   }
 
+  RebalancerStats getStats() const noexcept;
+
  private:
+  struct LoopStats {
+    // record the count and the time taken
+    void recordLoopTime(uint64_t msTaken) {
+      numLoops_.fetch_add(1, std::memory_order_relaxed);
+      lastLoopTimeMs_.store(msTaken, std::memory_order_relaxed);
+      totalLoopTimeMs_.fetch_add(msTaken, std::memory_order_relaxed);
+    }
+
+    uint64_t getAvgLoopTimeMs() const {
+      return numLoops_ ? totalLoopTimeMs_ / numLoops_ : 0;
+    }
+    uint64_t getLastLoopTimeMs() const { return lastLoopTimeMs_; }
+    uint64_t getNumLoops() const { return numLoops_; }
+
+   private:
+    // time it took us the last time and the average
+    std::atomic<uint64_t> lastLoopTimeMs_{0};
+    std::atomic<uint64_t> totalLoopTimeMs_{0};
+    std::atomic<uint64_t> numLoops_{0};
+  };
+
   // This will attempt to rebalance by
   //  1. reading the stats from the cache allocator
   //  2. analyzing the stats by using the rebalance strategy
@@ -86,6 +109,11 @@ class PoolRebalancer : public PeriodicWorker {
   // slab release stats for this rebalancer.
   ReleaseStats stats_;
 
+  // loop timing stats
+  LoopStats rebalanceStats_;
+  LoopStats releaseStats_;
+  LoopStats pickVictimStats_;
+
   // implements the actual logic of running tryRebalancing and
   // updating the stats
   void work() final;
diff --git a/cachelib/allocator/PoolResizeStrategy.h b/cachelib/allocator/PoolResizeStrategy.h
index ce284e5ea8..84b95b9ea7 100644
--- a/cachelib/allocator/PoolResizeStrategy.h
+++ b/cachelib/allocator/PoolResizeStrategy.h
@@ -37,10 +37,11 @@ class PoolResizeStrategy : public RebalanceStrategy {
       : RebalanceStrategy(PoolResize), minSlabsPerAllocClass_(minSlabs) {}
 
   // implementation that picks a victim
-  ClassId pickVictimImpl(const CacheBase& cache, PoolId poolId) final {
+  ClassId pickVictimImpl(const CacheBase&,
+                         PoolId,
+                         const PoolStats& stats) final {
     // pick the class with maximum eviction age. also, ensure that the class
     // does not drop below threshold of slabs.
-    const auto stats = cache.getPoolStats(poolId);
 
     auto victims = filterByNumEvictableSlabs(
         stats, stats.getClassIds(), minSlabsPerAllocClass_);
diff --git a/cachelib/allocator/RandomStrategy.h b/cachelib/allocator/RandomStrategy.h
index e8f26d18cc..190c2db381 100644
--- a/cachelib/allocator/RandomStrategy.h
+++ b/cachelib/allocator/RandomStrategy.h
@@ -37,9 +37,9 @@ class RandomStrategy : public RebalanceStrategy {
   RandomStrategy() = default;
   explicit RandomStrategy(Config c) : RebalanceStrategy(Random), config_{c} {}
 
-  RebalanceContext pickVictimAndReceiverImpl(const CacheBase& cache,
-                                             PoolId pid) final {
-    const auto stats = cache.getPoolStats(pid);
+  RebalanceContext pickVictimAndReceiverImpl(const CacheBase&,
+                                             PoolId,
+                                             const PoolStats& stats) final {
     auto victimIds =
         filterByNumEvictableSlabs(stats, stats.getClassIds(), config_.minSlabs);
     const auto victim = pickRandom(victimIds);
diff --git a/cachelib/allocator/RebalanceStrategy.cpp b/cachelib/allocator/RebalanceStrategy.cpp
index ba082d12a8..b3a98e3b9c 100644
--- a/cachelib/allocator/RebalanceStrategy.cpp
+++ b/cachelib/allocator/RebalanceStrategy.cpp
@@ -33,9 +33,9 @@ void RebalanceStrategy::recordCurrentState(PoolId pid, const PoolStats& stats) {
   }
 }
 
-ClassId RebalanceStrategy::pickAnyClassIdForResizing(const CacheBase& cache,
-                                                     PoolId pid) {
-  const auto stats = cache.getPoolStats(pid);
+ClassId RebalanceStrategy::pickAnyClassIdForResizing(const CacheBase&,
+                                                     PoolId,
+                                                     const PoolStats& stats) {
   const auto& candidates = stats.mpStats.classIds;
   // pick victim by maximum number of slabs.
   const auto ret = *std::max_element(
@@ -200,15 +200,15 @@ RebalanceContext RebalanceStrategy::pickVictimAndReceiver(
   return executeAndRecordCurrentState<RebalanceContext>(
       cache,
       pid,
-      [&]() {
+      [&](const PoolStats& stats) {
         // Pick receiver based on allocation failures. If nothing found,
         // fall back to strategy specific Impl
         RebalanceContext ctx;
-        ctx.receiverClassId = pickReceiverWithAllocFailures(cache, pid);
+        ctx.receiverClassId = pickReceiverWithAllocFailures(cache, pid, stats);
         if (ctx.receiverClassId != Slab::kInvalidClassId) {
-          ctx.victimClassId = pickVictimImpl(cache, pid);
+          ctx.victimClassId = pickVictimImpl(cache, pid, stats);
           if (ctx.victimClassId == cachelib::Slab::kInvalidClassId) {
-            ctx.victimClassId = pickAnyClassIdForResizing(cache, pid);
+            ctx.victimClassId = pickAnyClassIdForResizing(cache, pid, stats);
           }
           if (ctx.victimClassId != Slab::kInvalidClassId &&
               ctx.victimClassId != ctx.receiverClassId &&
@@ -219,7 +219,7 @@ RebalanceContext RebalanceStrategy::pickVictimAndReceiver(
             return ctx;
           }
         }
-        return pickVictimAndReceiverImpl(cache, pid);
+        return pickVictimAndReceiverImpl(cache, pid, stats);
       },
       kNoOpContext);
 }
@@ -231,19 +231,19 @@ ClassId RebalanceStrategy::pickVictimForResizing(const CacheBase& cache,
   auto victimClassId = executeAndRecordCurrentState<ClassId>(
       cache,
       pid,
-      [&]() { return pickVictimImpl(cache, pid); },
+      [&](const PoolStats& stats) { return pickVictimImpl(cache, pid, stats); },
       Slab::kInvalidClassId);
 
   if (victimClassId == cachelib::Slab::kInvalidClassId) {
-    victimClassId = pickAnyClassIdForResizing(cache, pid);
+    const auto poolStats = cache.getPoolStats(pid);
+    victimClassId = pickAnyClassIdForResizing(cache, pid, poolStats);
   }
 
   return victimClassId;
 }
 
-ClassId RebalanceStrategy::pickReceiverWithAllocFailures(const CacheBase& cache,
-                                                         PoolId pid) {
-  const auto stats = cache.getPoolStats(pid);
+ClassId RebalanceStrategy::pickReceiverWithAllocFailures(
+    const CacheBase&, PoolId pid, const PoolStats& stats) {
   auto receivers = stats.getClassIds();
 
   const auto receiverWithAllocFailures =
@@ -266,7 +266,7 @@ template <typename T>
 T RebalanceStrategy::executeAndRecordCurrentState(
     const CacheBase& cache,
     PoolId pid,
-    const std::function<T()>& impl,
+    const std::function<T(const PoolStats&)>& impl,
     T noOp) {
   const auto poolStats = cache.getPoolStats(pid);
 
@@ -277,7 +277,7 @@ T RebalanceStrategy::executeAndRecordCurrentState(
     return noOp;
   }
 
-  auto rv = impl();
+  auto rv = impl(poolStats);
 
   recordCurrentState(pid, poolStats);
 
diff --git a/cachelib/allocator/RebalanceStrategy.h b/cachelib/allocator/RebalanceStrategy.h
index c9aecf8669..8898e58c37 100644
--- a/cachelib/allocator/RebalanceStrategy.h
+++ b/cachelib/allocator/RebalanceStrategy.h
@@ -58,8 +58,7 @@ class RebalanceStrategy {
 
   struct BaseConfig {};
 
-  explicit RebalanceStrategy(Type strategyType = PickNothingOrTest)
-      : type_(strategyType) {}
+  RebalanceStrategy() = default;
 
   virtual ~RebalanceStrategy() = default;
 
@@ -83,11 +82,15 @@ class RebalanceStrategy {
   using PoolState = std::array<detail::Info, MemoryAllocator::kMaxClasses>;
   static const RebalanceContext kNoOpContext;
 
-  virtual RebalanceContext pickVictimAndReceiverImpl(const CacheBase&, PoolId) {
+  explicit RebalanceStrategy(Type strategyType) : type_(strategyType) {}
+
+  virtual RebalanceContext pickVictimAndReceiverImpl(const CacheBase&,
+                                                     PoolId,
+                                                     const PoolStats&) {
     return {};
   }
 
-  virtual ClassId pickVictimImpl(const CacheBase&, PoolId) {
+  virtual ClassId pickVictimImpl(const CacheBase&, PoolId, const PoolStats&) {
     return Slab::kInvalidClassId;
   }
 
@@ -148,7 +151,9 @@ class RebalanceStrategy {
 
  private:
   // picks any of the class id ordered by the total slabs.
-  ClassId pickAnyClassIdForResizing(const CacheBase& cache, PoolId pid);
+  ClassId pickAnyClassIdForResizing(const CacheBase& cache,
+                                    PoolId pid,
+                                    const PoolStats& poolStats);
 
   // initialize the pool's state to the current stats.
   void initPoolState(PoolId pid, const PoolStats& stats);
@@ -159,7 +164,9 @@ class RebalanceStrategy {
 
   // Pick a receiver with max alloc failures. If no alloc failures, return
   // invalid classid.
-  ClassId pickReceiverWithAllocFailures(const CacheBase& cache, PoolId pid);
+  ClassId pickReceiverWithAllocFailures(const CacheBase& cache,
+                                        PoolId pid,
+                                        const PoolStats& stat);
 
   // Ensure pool state is initialized before calling impl, and update pool
   // state after calling impl.
@@ -171,10 +178,10 @@ class RebalanceStrategy {
   template <typename T>
   T executeAndRecordCurrentState(const CacheBase& cache,
                                  PoolId pid,
-                                 const std::function<T()>& impl,
+                                 const std::function<T(const PoolStats&)>& impl,
                                  T noOp);
 
-  Type type_{NumTypes};
+  Type type_ = PickNothingOrTest;
 
   // maintain the state of the previous snapshot of pool for every pool.  We
   // ll use this for processing and getting the deltas for some of these.
diff --git a/cachelib/allocator/Refcount.h b/cachelib/allocator/Refcount.h
index c60dea34f1..107e10735e 100644
--- a/cachelib/allocator/Refcount.h
+++ b/cachelib/allocator/Refcount.h
@@ -132,32 +132,28 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
   RefcountWithFlags& operator=(RefcountWithFlags&&) = delete;
 
   // Bumps up the reference count only if the new count will be strictly less
-  // than or equal to the maxCount.
-  // @return true if refcount is bumped. false otherwise.
-  FOLLY_ALWAYS_INLINE bool incRef() noexcept {
-    Value* const refPtr = &refCount_;
-    unsigned int nCASFailures = 0;
-    constexpr bool isWeak = false;
-    Value oldVal = __atomic_load_n(refPtr, __ATOMIC_RELAXED);
-
-    while (true) {
-      const Value newCount = oldVal + static_cast<Value>(1);
-      if (UNLIKELY((oldVal & kAccessRefMask) == (kAccessRefMask))) {
-        return false;
+  // than or equal to the maxCount and the item is not exclusive
+  // @return true if refcount is bumped. false otherwise (if item is exclusive)
+  // @throw  exception::RefcountOverflow if new count would be greater than
+  // maxCount
+  FOLLY_ALWAYS_INLINE bool incRef() {
+    auto predicate = [](const Value curValue) {
+      Value bitMask = getAdminRef<kExclusive>();
+
+      const bool exlusiveBitIsSet = curValue & bitMask;
+      if (UNLIKELY((curValue & kAccessRefMask) == (kAccessRefMask))) {
+        throw exception::RefcountOverflow("Refcount maxed out.");
       }
 
-      if (__atomic_compare_exchange_n(refPtr, &oldVal, newCount, isWeak,
-                                      __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) {
-        return true;
-      }
+      // Check if the item is not marked for eviction
+      return !exlusiveBitIsSet || ((curValue & kAccessRefMask) != 0);
+    };
 
-      if ((++nCASFailures % 4) == 0) {
-        // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl
-        // above should take about 100 clock cycles. we pause once every 400
-        // cycles or so if we are extremely unlucky.
-        folly::asm_volatile_pause();
-      }
-    }
+    auto newValue = [](const Value curValue) {
+      return (curValue + static_cast<Value>(1));
+    };
+
+    return atomicUpdateValue(predicate, newValue);
   }
 
   // Bumps down the reference count
@@ -167,33 +163,38 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
   // @throw  RefcountUnderflow when we are trying to decremenet from 0
   //         refcount and have a refcount leak.
   FOLLY_ALWAYS_INLINE Value decRef() {
-    Value* const refPtr = &refCount_;
-    unsigned int nCASFailures = 0;
-    constexpr bool isWeak = false;
-
-    Value oldVal = __atomic_load_n(refPtr, __ATOMIC_RELAXED);
-    while (true) {
-      const Value newCount = oldVal - static_cast<Value>(1);
-      if ((oldVal & kAccessRefMask) == 0) {
+    auto predicate = [](const Value curValue) {
+      if ((curValue & kAccessRefMask) == 0) {
         throw exception::RefcountUnderflow(
             "Trying to decRef with no refcount. RefCount Leak!");
       }
+      return true;
+    };
 
-      if (__atomic_compare_exchange_n(refPtr, &oldVal, newCount, isWeak,
-                                      __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) {
-        return newCount & kRefMask;
-      }
-      if ((++nCASFailures % 4) == 0) {
-        // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl
-        // above should take about 100 clock cycles. we pause once every 400
-        // cycles or so if we are extremely unlucky
-        folly::asm_volatile_pause();
-      }
-    }
+    Value retValue;
+    auto newValue = [&retValue](const Value curValue) {
+      retValue = (curValue - static_cast<Value>(1));
+      return retValue;
+    };
+
+    auto updated = atomicUpdateValue(predicate, newValue);
+    XDCHECK(updated);
+
+    return retValue & kRefMask;
   }
 
-  // Return refcount excluding control bits and flags
-  Value getAccessRef() const noexcept { return getRaw() & kAccessRefMask; }
+  // Return refcount excluding moving refcount, control bits and flags.
+  Value getAccessRef() const noexcept {
+    auto raw = getRaw();
+    auto accessRef = raw & kAccessRefMask;
+
+    if ((raw & getAdminRef<kExclusive>()) && accessRef >= 1) {
+      // if item is moving, ignore the extra ref
+      return accessRef - static_cast<Value>(1);
+    } else {
+      return accessRef;
+    }
+  }
 
   // Return access ref and the admin ref bits
   Value getRefWithAccessAndAdmin() const noexcept {
@@ -246,65 +247,160 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
   }
 
   /**
-   * The following four functions are used to track whether or not
-   * an item is currently in the process of being moved. This happens during a
-   * slab rebalance or resize operation or during eviction.
+   * The following two functions correspond to whether or not an item is
+   * currently in the process of being evicted.
    *
-   * An item can only be marked exclusive when `isInMMContainer` returns true
-   * and the item is not yet marked as exclusive. This operation is atomic.
+   * An item that is marked for eviction prevents from obtaining a handle to
+   * the item (incRef() will return false). This guarantees that eviction of
+   * marked item will always suceed.
    *
-   * User can also query if an item "isOnlyExclusive". This returns true only
-   * if the refcount is 0 and only the exclusive bit is set.
+   * An item can only be marked for eviction when `isInMMContainer` returns true
+   * and item does not have `kExclusive` bit set and access ref count is 0.
+   * This operation is atomic.
    *
-   * Unmarking exclusive does not depend on `isInMMContainer`.
-   * Unmarking exclusive will also return the refcount at the moment of
-   * unmarking.
+   * When item is marked for  eviction, `kExclusive` bit is set and ref count is
+   * zero.
+   *
+   * Unmarking for eviction clears the `kExclusive` bit. `unamrkForEviction`
+   * does not depend on `isInMMContainer` nor `isAccessible`
    */
-  bool markExclusive() noexcept {
-    Value bitMask = getAdminRef<kExclusive>();
-    Value conditionBitMask = getAdminRef<kLinked>();
+  bool markForEviction() noexcept {
+    Value linkedBitMask = getAdminRef<kLinked>();
+    Value exclusiveBitMask = getAdminRef<kExclusive>();
 
-    Value* const refPtr = &refCount_;
-    unsigned int nCASFailures = 0;
-    constexpr bool isWeak = false;
-    Value curValue = __atomic_load_n(refPtr, __ATOMIC_RELAXED);
-    while (true) {
-      const bool flagSet = curValue & conditionBitMask;
-      const bool alreadyExclusive = curValue & bitMask;
-      if (!flagSet || alreadyExclusive) {
+    auto predicate = [linkedBitMask, exclusiveBitMask](const Value curValue) {
+      const bool unlinked = !(curValue & linkedBitMask);
+      const bool alreadyExclusive = curValue & exclusiveBitMask;
+
+      if (unlinked || alreadyExclusive) {
         return false;
       }
-
-      const Value newValue = curValue | bitMask;
-      if (__atomic_compare_exchange_n(refPtr, &curValue, newValue, isWeak,
-                                      __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) {
-        XDCHECK(newValue & conditionBitMask);
-        return true;
+      if ((curValue & kAccessRefMask) != 0) {
+        return false;
       }
 
-      if ((++nCASFailures % 4) == 0) {
-        // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl
-        // above should take about 100 clock cycles. we pause once every 400
-        // cycles or so if we are extremely unlucky.
-        folly::asm_volatile_pause();
-      }
-    }
+      return true;
+    };
+
+    auto newValue = [exclusiveBitMask](const Value curValue) {
+      return curValue | exclusiveBitMask;
+    };
+
+    return atomicUpdateValue(predicate, newValue);
   }
-  Value unmarkExclusive() noexcept {
+
+  Value unmarkForEviction() noexcept {
+    XDCHECK(isMarkedForEviction());
     Value bitMask = ~getAdminRef<kExclusive>();
     return __atomic_and_fetch(&refCount_, bitMask, __ATOMIC_ACQ_REL) & kRefMask;
   }
-  bool isExclusive() const noexcept {
-    return getRaw() & getAdminRef<kExclusive>();
+
+  bool isMarkedForEviction() const noexcept {
+    auto raw = getRaw();
+    return (raw & getAdminRef<kExclusive>()) && ((raw & kAccessRefMask) == 0);
+  }
+
+  /**
+   * The following functions correspond to whether or not an item is
+   * currently in the processed of being moved.
+   *
+   * A `moving` item cannot be recycled nor freed to the allocator. It has
+   * to be unmarked first.
+   *
+   * When moving, internal ref count is always >= 1 and `kExclusive` bit is set
+   * getRefCount does not return the extra ref (it may return 0).
+   *
+   * An item can only be marked moving when `isInMMContainer` returns true
+   * and does not have `kExclusive` bit set.
+   *
+   * User can also query if an item "isOnlyMoving". This returns true only
+   * if the refcount is one and only the exlusive bit is set.
+   *
+   * Unmarking clears `kExclusive` bit and decreses the interanl refCount by 1.
+   * `unmarkMoving` does does not depend on `isInMMContainer`
+   */
+  bool markMoving() {
+    Value linkedBitMask = getAdminRef<kLinked>();
+    Value exclusiveBitMask = getAdminRef<kExclusive>();
+
+    auto predicate = [linkedBitMask, exclusiveBitMask](const Value curValue) {
+      const bool unlinked = !(curValue & linkedBitMask);
+      const bool alreadyExclusive = curValue & exclusiveBitMask;
+
+      if (unlinked || alreadyExclusive) {
+        return false;
+      }
+      if (UNLIKELY((curValue & kAccessRefMask) == (kAccessRefMask))) {
+        throw exception::RefcountOverflow("Refcount maxed out.");
+      }
+
+      return true;
+    };
+
+    auto newValue = [exclusiveBitMask](const Value curValue) {
+      // Set exclusive flag and make the ref count non-zero (to distinguish
+      // from exclusive case). This extra ref will not be reported to the
+      // user
+      return (curValue + static_cast<Value>(1)) | exclusiveBitMask;
+    };
+
+    return atomicUpdateValue(predicate, newValue);
+  }
+
+  Value unmarkMoving() noexcept {
+    XDCHECK(isMoving());
+    auto predicate = [](const Value curValue) {
+      XDCHECK((curValue & kAccessRefMask) != 0);
+      return true;
+    };
+
+    Value retValue;
+    auto newValue = [&retValue](const Value curValue) {
+      retValue =
+          (curValue - static_cast<Value>(1)) & ~getAdminRef<kExclusive>();
+      return retValue;
+    };
+
+    auto updated = atomicUpdateValue(predicate, newValue);
+    XDCHECK(updated);
+
+    return retValue & kRefMask;
+  }
+
+  bool isMoving() const noexcept {
+    auto raw = getRaw();
+    return (raw & getAdminRef<kExclusive>()) && ((raw & kAccessRefMask) != 0);
+  }
+
+  /**
+   * This function attempts to mark item for eviction.
+   * Can only be called on the item that is moving.
+   *
+   * Returns true and marks the item for eviction only if item isOnlyMoving.
+   * Leaves the item marked as moving and returns false otherwise.
+   */
+  bool markForEvictionWhenMoving() {
+    XDCHECK(isMoving());
+
+    auto predicate = [](const Value curValue) {
+      return (curValue & kAccessRefMask) == 1;
+    };
+
+    auto newValue = [](const Value curValue) {
+      XDCHECK((curValue & kAccessRefMask) == 1);
+      return (curValue - static_cast<Value>(1));
+    };
+
+    return atomicUpdateValue(predicate, newValue);
   }
-  bool isOnlyExclusive() const noexcept {
-    // An item is only exclusive when its refcount is zero and only the
-    // exclusive bit among all the control bits is set. This indicates an item
-    // is exclusive to the current thread. No other thread is allowed to
-    // do anything with it.
+
+  bool isOnlyMoving() const noexcept {
+    // An item is only moving when its refcount is one and only the exclusive
+    // bit among all the control bits is set. This indicates an item is already
+    // on its way out of cache.
     auto ref = getRefWithAccessAndAdmin();
-    bool anyOtherBitSet = ref & ~getAdminRef<kExclusive>();
-    if (anyOtherBitSet) {
+    Value valueWithoutExclusiveBit = ref & ~getAdminRef<kExclusive>();
+    if (valueWithoutExclusiveBit != 1) {
       return false;
     }
     return ref & getAdminRef<kExclusive>();
@@ -370,6 +466,39 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
   }
 
  private:
+  /**
+   * Helper function to modify refCount_ atomically.
+   *
+   * If predicate(currentValue) is true, then it atomically assigns result
+   * of newValueF(currentValue) to refCount_ and returns true. Otherwise
+   * returns false and leaves refCount_ unmodified.
+   */
+  template <typename P, typename F>
+  bool atomicUpdateValue(P&& predicate, F&& newValueF) {
+    Value* const refPtr = &refCount_;
+    unsigned int nCASFailures = 0;
+    constexpr bool isWeak = false;
+    Value curValue = __atomic_load_n(refPtr, __ATOMIC_RELAXED);
+    while (true) {
+      if (!predicate(curValue)) {
+        return false;
+      }
+
+      const Value newValue = newValueF(curValue);
+      if (__atomic_compare_exchange_n(refPtr, &curValue, newValue, isWeak,
+                                      __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) {
+        return true;
+      }
+
+      if ((++nCASFailures % 4) == 0) {
+        // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl
+        // above should take about 100 clock cycles. we pause once every 400
+        // cycles or so if we are extremely unlucky.
+        folly::asm_volatile_pause();
+      }
+    }
+  }
+
   template <Flags flagBit>
   static Value getFlag() noexcept {
     static_assert(flagBit >= kNumAccessRefBits + kNumAdminRefBits,
diff --git a/cachelib/allocator/datastruct/serialize/objects.thrift b/cachelib/allocator/datastruct/serialize/objects.thrift
index bd2c8b79bc..223b804e5b 100644
--- a/cachelib/allocator/datastruct/serialize/objects.thrift
+++ b/cachelib/allocator/datastruct/serialize/objects.thrift
@@ -22,17 +22,17 @@ namespace cpp2 facebook.cachelib.serialization
 
 // Saved state for an SList
 struct SListObject {
-  2: required i64 size,
-  3: required i64 compressedHead, // Pointer to the head element
+  2: required i64 size;
+  3: required i64 compressedHead; // Pointer to the head element
   // TODO(bwatling): remove the default value and clean up SList::SList() once
   // we can rely on 'compressedTail' always being valid.
-  4: i64 compressedTail = -1, // Pointer to the tail element
+  4: i64 compressedTail = -1; // Pointer to the tail element
 }
 
 struct DListObject {
-  1: required i64 compressedHead,
-  2: required i64 compressedTail,
-  3: required i64 size,
+  1: required i64 compressedHead;
+  2: required i64 compressedTail;
+  3: required i64 size;
 }
 
 struct MultiDListObject {
diff --git a/cachelib/allocator/datastruct/tests/test_objects.thrift b/cachelib/allocator/datastruct/tests/test_objects.thrift
index 51af098510..4cb58b1886 100644
--- a/cachelib/allocator/datastruct/tests/test_objects.thrift
+++ b/cachelib/allocator/datastruct/tests/test_objects.thrift
@@ -20,6 +20,6 @@ namespace cpp2 facebook.cachelib.test_serialization
 // testing warm rolls from the old format to the new format.
 // TODO(bwatling): remove this when 'compressedTail' is always present.
 struct SListObjectNoCompressedTail {
-  2: required i64 size,
-  3: required i64 compressedHead, // Pointer to the head element
+  2: required i64 size;
+  3: required i64 compressedHead; // Pointer to the head element
 }
diff --git a/cachelib/allocator/memory/SlabAllocator.cpp b/cachelib/allocator/memory/SlabAllocator.cpp
index ade5a8e535..0106f1bf4e 100644
--- a/cachelib/allocator/memory/SlabAllocator.cpp
+++ b/cachelib/allocator/memory/SlabAllocator.cpp
@@ -40,7 +40,9 @@
 using namespace facebook::cachelib;
 
 namespace {
-size_t roundDownToSlabSize(size_t size) { return size - (size % sizeof(Slab)); }
+static inline size_t roundDownToSlabSize(size_t size) {
+  return size - (size % sizeof(Slab));
+}
 } // namespace
 
 // definitions to avoid ODR violation.
diff --git a/cachelib/allocator/nvmcache/NvmCache.h b/cachelib/allocator/nvmcache/NvmCache.h
index c9f5c753f0..e00969b51e 100644
--- a/cachelib/allocator/nvmcache/NvmCache.h
+++ b/cachelib/allocator/nvmcache/NvmCache.h
@@ -111,7 +111,7 @@ class NvmCache {
 
     // when enabled, nvmcache will attempt to resolve misses without incurring
     // thread hops by using synchronous methods.
-    bool enableFastNegativeLookups{false};
+    bool enableFastNegativeLookups{true};
 
     // serialize the config for debugging purposes
     std::map<std::string, std::string> serialize() const;
diff --git a/cachelib/allocator/nvmcache/tests/NvmCacheTests.cpp b/cachelib/allocator/nvmcache/tests/NvmCacheTests.cpp
index 7355627fea..ec74c51980 100644
--- a/cachelib/allocator/nvmcache/tests/NvmCacheTests.cpp
+++ b/cachelib/allocator/nvmcache/tests/NvmCacheTests.cpp
@@ -245,7 +245,13 @@ TEST_F(NvmCacheTest, EvictToNvmGetCheckCtime) {
     ASSERT_NE(nullptr, it);
     cache_->insertOrReplace(it);
     keyToCtime.insert({key, it->getCreationTime()});
+    // Avoid any nvm eviction being dropped due to the race with still
+    // outstanding remove operation for insertion
+    if (i % 100 == 0) {
+      nvm.flushNvmCache();
+    }
   }
+  nvm.flushNvmCache();
 
   const auto nEvictions = this->evictionCount() - evictBefore;
   ASSERT_LT(0, nEvictions);
@@ -331,6 +337,11 @@ TEST_F(NvmCacheTest, Delete) {
     auto it = nvm.allocate(pid, key, 15 * 1024);
     ASSERT_NE(nullptr, it);
     nvm.insertOrReplace(it);
+    // Avoid any nvm eviction being dropped due to the race with still
+    // outstanding remove operation for insertion
+    if (i % 100 == 0) {
+      nvm.flushNvmCache();
+    }
   }
   nvm.flushNvmCache();
 
@@ -533,6 +544,11 @@ TEST_F(NvmCacheTest, NvmEvicted) {
     auto it = nvm.allocate(pid, key, allocSize);
     ASSERT_NE(nullptr, it);
     nvm.insertOrReplace(it);
+    // Avoid any nvm eviction being dropped due to the race with still
+    // outstanding remove operation for insertion
+    if (i % 100 == 0) {
+      nvm.flushNvmCache();
+    }
   }
   nvm.flushNvmCache();
 
diff --git a/cachelib/allocator/serialize/objects.thrift b/cachelib/allocator/serialize/objects.thrift
index 8d30ee8d83..61297cdf1d 100644
--- a/cachelib/allocator/serialize/objects.thrift
+++ b/cachelib/allocator/serialize/objects.thrift
@@ -23,136 +23,136 @@ include "cachelib/allocator/datastruct/serialize/objects.thrift"
 // make sure to communicate that with our users.
 
 struct CacheAllocatorMetadata {
-  1: required i64 allocatorVersion, // version of cache alloctor
-  2: i64 cacheCreationTime = 0, // time when the cache was created.
-  3: required i64 accessType = 0, // default chained alloc
-  4: required i64 mmType = 0, // default LRU
-  5: map<byte, map<byte, i64>> fragmentationSize,
-  6: list<byte> compactCachePools,
-  7: i64 numPermanentItems,
-  8: i64 numChainedParentItems,
-  9: i64 numChainedChildItems,
-  10: i64 ramFormatVersion = 0, // format version of ram cache
-  11: i64 numAbortedSlabReleases = 0, // number of times slab release is aborted
+  1: required i64 allocatorVersion; // version of cache alloctor
+  2: i64 cacheCreationTime = 0; // time when the cache was created.
+  3: required i64 accessType = 0; // default chained alloc
+  4: required i64 mmType = 0; // default LRU
+  5: map<byte, map<byte, i64>> fragmentationSize;
+  6: list<byte> compactCachePools;
+  7: i64 numPermanentItems;
+  8: i64 numChainedParentItems;
+  9: i64 numChainedChildItems;
+  10: i64 ramFormatVersion = 0; // format version of ram cache
+  11: i64 numAbortedSlabReleases = 0; // number of times slab release is aborted
 }
 
 struct NvmCacheMetadata {
-  1: i64 nvmFormatVersion = 0,
-  2: i64 creationTime = 0,
-  3: bool safeShutDown = false,
-  4: bool encryptionEnabled = false,
-  5: bool truncateAllocSize = false,
+  1: i64 nvmFormatVersion = 0;
+  2: i64 creationTime = 0;
+  3: bool safeShutDown = false;
+  4: bool encryptionEnabled = false;
+  5: bool truncateAllocSize = false;
 }
 
 struct CompactCacheMetadataObject {
-  1: required i64 keySize,
-  2: required i64 valueSize,
+  1: required i64 keySize;
+  2: required i64 valueSize;
 }
 
 struct CompactCacheAllocatorObject {
-  1: required list<i64> chunks,
-  2: required CompactCacheMetadataObject ccMetadata,
+  1: required list<i64> chunks;
+  2: required CompactCacheMetadataObject ccMetadata;
 }
 
 struct CompactCacheAllocatorManagerObject {
-  1: required map<string, CompactCacheAllocatorObject> allocators,
+  1: required map<string, CompactCacheAllocatorObject> allocators;
 }
 
 struct MMLruConfig {
-  1: required i32 lruRefreshTime,
-  2: required bool updateOnWrite,
-  3: required i32 lruInsertionPointSpec,
-  4: bool updateOnRead = true,
-  5: bool tryLockUpdate = false,
-  6: double lruRefreshRatio = 0.0,
+  1: required i32 lruRefreshTime;
+  2: required bool updateOnWrite;
+  3: required i32 lruInsertionPointSpec;
+  4: bool updateOnRead = true;
+  5: bool tryLockUpdate = false;
+  6: double lruRefreshRatio = 0.0;
 }
 
 struct MMLruObject {
-  1: required MMLruConfig config,
+  1: required MMLruConfig config;
 
   // number of evictions for this MM object.
-  5: i64 evictions = 0,
+  5: i64 evictions = 0;
 
-  6: required i64 insertionPoint,
-  7: required i64 tailSize,
-  8: required DListObject lru,
-  9: required i64 compressedInsertionPoint,
+  6: required i64 insertionPoint;
+  7: required i64 tailSize;
+  8: required DListObject lru;
+  9: required i64 compressedInsertionPoint;
 }
 
 struct MMLruCollection {
-  1: required map<i32, map<i32, MMLruObject>> pools,
+  1: required map<i32, map<i32, MMLruObject>> pools;
 }
 
 struct MM2QConfig {
-  1: required i32 lruRefreshTime,
-  2: required bool updateOnWrite,
-  3: required i32 hotSizePercent,
-  4: required i32 coldSizePercent,
-  5: bool updateOnRead = true,
-  6: bool tryLockUpdate = false,
-  7: bool rebalanceOnRecordAccess = true,
-  8: double lruRefreshRatio = 0.0,
+  1: required i32 lruRefreshTime;
+  2: required bool updateOnWrite;
+  3: required i32 hotSizePercent;
+  4: required i32 coldSizePercent;
+  5: bool updateOnRead = true;
+  6: bool tryLockUpdate = false;
+  7: bool rebalanceOnRecordAccess = true;
+  8: double lruRefreshRatio = 0.0;
 }
 
 struct MM2QObject {
-  1: required MM2QConfig config,
-  13: bool tailTrackingEnabled = false,
+  1: required MM2QConfig config;
+  13: bool tailTrackingEnabled = false;
 
   // number of evictions for this MM object.
-  11: i64 evictions = 0,
+  11: i64 evictions = 0;
 
   // Warm, hot and cold lrus
-  12: required MultiDListObject lrus,
+  12: required MultiDListObject lrus;
 }
 
 struct MM2QCollection {
-  1: required map<i32, map<i32, MM2QObject>> pools,
+  1: required map<i32, map<i32, MM2QObject>> pools;
 }
 
 struct MMTinyLFUConfig {
-  1: required i32 lruRefreshTime,
-  2: required bool updateOnWrite,
-  3: required i32 windowToCacheSizeRatio,
-  4: required i32 tinySizePercent,
-  5: bool updateOnRead = true,
-  6: bool tryLockUpdate = false,
-  7: double lruRefreshRatio = 0.0,
+  1: required i32 lruRefreshTime;
+  2: required bool updateOnWrite;
+  3: required i32 windowToCacheSizeRatio;
+  4: required i32 tinySizePercent;
+  5: bool updateOnRead = true;
+  6: bool tryLockUpdate = false;
+  7: double lruRefreshRatio = 0.0;
 }
 
 struct MMTinyLFUObject {
-  1: required MMTinyLFUConfig config,
+  1: required MMTinyLFUConfig config;
 
   // number of evictions for this MM object.
-  2: i64 evictions = 0,
+  2: i64 evictions = 0;
 
   // Warm, hot and cold lrus
-  3: required MultiDListObject lrus,
+  3: required MultiDListObject lrus;
 }
 
 struct MMTinyLFUCollection {
-  1: required map<i32, map<i32, MMTinyLFUObject>> pools,
+  1: required map<i32, map<i32, MMTinyLFUObject>> pools;
 }
 
 struct ChainedHashTableObject {
   // fields in ChainedHashTable::Config
-  1: required i32 bucketsPower,
-  2: required i32 locksPower,
-  3: i64 numKeys,
+  1: required i32 bucketsPower;
+  2: required i32 locksPower;
+  3: i64 numKeys;
 
   // this magic id ensures on a warm roll, user cannot
   // start the cache with a different hash function
-  4: i32 hasherMagicId = 0,
+  4: i32 hasherMagicId = 0;
 }
 
 struct MMTTLBucketObject {
-  4: i64 expirationTime,
-  5: i64 creationTime,
-  6: required DListObject dList,
+  4: i64 expirationTime;
+  5: i64 creationTime;
+  6: required DListObject dList;
 }
 
 struct TTLBucketCollection {
-  1: required map<i64, MMTTLBucketObject> buckets,
-  2: i64 minEpoch = 0,
-  3: i64 maxTTL = 0,
-  4: i64 interval = 0,
+  1: required map<i64, MMTTLBucketObject> buckets;
+  2: i64 minEpoch = 0;
+  3: i64 maxTTL = 0;
+  4: i64 interval = 0;
 }
diff --git a/cachelib/allocator/tests/AllocatorTestUtils.h b/cachelib/allocator/tests/AllocatorTestUtils.h
index 58664ee8bc..e089f6c14c 100644
--- a/cachelib/allocator/tests/AllocatorTestUtils.h
+++ b/cachelib/allocator/tests/AllocatorTestUtils.h
@@ -33,12 +33,15 @@ struct AlwaysPickOneRebalanceStrategy : public RebalanceStrategy {
  private:
   ClassId pickVictim(const CacheBase&, PoolId) { return victim; }
 
-  ClassId pickVictimImpl(const CacheBase& allocator, PoolId pid) override {
+  ClassId pickVictimImpl(const CacheBase& allocator,
+                         PoolId pid,
+                         const PoolStats&) override {
     return pickVictim(allocator, pid);
   }
 
   RebalanceContext pickVictimAndReceiverImpl(const CacheBase& allocator,
-                                             PoolId pid) override {
+                                             PoolId pid,
+                                             const PoolStats&) override {
     return {pickVictim(allocator, pid), receiver};
   }
 };
diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h
index d684545cb9..aa9d38a857 100644
--- a/cachelib/allocator/tests/BaseAllocatorTest.h
+++ b/cachelib/allocator/tests/BaseAllocatorTest.h
@@ -713,35 +713,29 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
       auto handle = alloc.find("key");
       ASSERT_NE(handle, nullptr);
       ASSERT_TRUE(isConst(handle->getMemory()));
-      ASSERT_EQ(handle.isWriteHandle(), false);
 
       // read handle clone
       auto handle2 = handle.clone();
       ASSERT_TRUE(isConst(handle2->getMemory()));
-      ASSERT_EQ(handle2.isWriteHandle(), false);
 
       // upgrade a read handle to a write handle
       auto handle3 = std::move(handle).toWriteHandle();
       ASSERT_FALSE(isConst(handle3->getMemory()));
-      ASSERT_EQ(handle3.isWriteHandle(), true);
     }
 
     {
       auto handle = alloc.findToWrite("key");
       ASSERT_NE(handle, nullptr);
       ASSERT_FALSE(isConst(handle->getMemory()));
-      ASSERT_EQ(handle.isWriteHandle(), true);
 
       // write handle clone
       auto handle2 = handle.clone();
       ASSERT_FALSE(isConst(handle2->getMemory()));
-      ASSERT_EQ(handle2.isWriteHandle(), true);
 
       // downgrade a write handle to a read handle
       ReadHandle handle3 = handle.clone();
       ASSERT_NE(handle3, nullptr);
       ASSERT_TRUE(isConst(handle3->getMemory()));
-      ASSERT_EQ(handle3.isWriteHandle(), false);
     }
 
     {
@@ -752,7 +746,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
       // This is like doing a "clone" and setting it into wait context
       waitContext->set(alloc.find("key"));
       auto handle2 = std::move(handle).toWriteHandle();
-      ASSERT_EQ(handle2.isWriteHandle(), true);
+      ASSERT_FALSE(isConst(handle2->getMemory()));
     }
   }
 
diff --git a/cachelib/allocator/tests/ItemTest.cpp b/cachelib/allocator/tests/ItemTest.cpp
index b0f3a2fdec..70dd1277fe 100644
--- a/cachelib/allocator/tests/ItemTest.cpp
+++ b/cachelib/allocator/tests/ItemTest.cpp
@@ -83,10 +83,20 @@ TEST(ItemTest, ExpiryTime) {
   EXPECT_EQ(tenMins, item->getConfiguredTTL());
 
   // Test that writes fail while the item is moving
-  item->markExclusive();
+  result = item->markMoving();
+  EXPECT_TRUE(result);
+  result = item->updateExpiryTime(0);
+  EXPECT_FALSE(result);
+  item->unmarkMoving();
+
+  // Test that writes fail while the item is marked for eviction
+  item->markAccessible();
+  result = item->markForEviction();
+  EXPECT_TRUE(result);
   result = item->updateExpiryTime(0);
   EXPECT_FALSE(result);
-  item->unmarkExclusive();
+  item->unmarkForEviction();
+  item->unmarkAccessible();
 
   // Test that writes fail while the item is not in an MMContainer
   item->unmarkInMMContainer();
diff --git a/cachelib/allocator/tests/NvmTestUtils.h b/cachelib/allocator/tests/NvmTestUtils.h
index 6d6242aadf..cad96c41d4 100644
--- a/cachelib/allocator/tests/NvmTestUtils.h
+++ b/cachelib/allocator/tests/NvmTestUtils.h
@@ -27,7 +27,7 @@ namespace utils {
 using NavyConfig = navy::NavyConfig;
 inline NavyConfig getNvmTestConfig(const std::string& cacheDir) {
   NavyConfig config{};
-  config.setSimpleFile(cacheDir + "/navy", 100 * 1024ULL * 1024ULL);
+  config.setSimpleFile(cacheDir + "/navy", 200 * 1024ULL * 1024ULL);
   config.setDeviceMetadataSize(4 * 1024 * 1024);
   config.setBlockSize(1024);
   config.setNavyReqOrderingShards(10);
diff --git a/cachelib/allocator/tests/RefCountTest.cpp b/cachelib/allocator/tests/RefCountTest.cpp
index b355a48a8e..1f31894ddc 100644
--- a/cachelib/allocator/tests/RefCountTest.cpp
+++ b/cachelib/allocator/tests/RefCountTest.cpp
@@ -30,6 +30,7 @@ class RefCountTest : public AllocTestBase {
  public:
   static void testMultiThreaded();
   static void testBasic();
+  static void testMarkForEvictionAndMoving();
 };
 
 void RefCountTest::testMultiThreaded() {
@@ -81,7 +82,7 @@ void RefCountTest::testBasic() {
   ASSERT_EQ(0, ref.getRaw());
   ASSERT_FALSE(ref.isInMMContainer());
   ASSERT_FALSE(ref.isAccessible());
-  ASSERT_FALSE(ref.isExclusive());
+  ASSERT_FALSE(ref.isMoving());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag0>());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
 
@@ -89,7 +90,7 @@ void RefCountTest::testBasic() {
   ref.markInMMContainer();
   ASSERT_TRUE(ref.isInMMContainer());
   ASSERT_FALSE(ref.isAccessible());
-  ASSERT_FALSE(ref.isExclusive());
+  ASSERT_FALSE(ref.isMoving());
   ASSERT_EQ(0, ref.getAccessRef());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag0>());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
@@ -105,13 +106,13 @@ void RefCountTest::testBasic() {
 
   // Incrementing past the max will fail
   auto rawRef = ref.getRaw();
-  ASSERT_FALSE(ref.incRef());
+  ASSERT_THROW(ref.incRef(), std::overflow_error);
   ASSERT_EQ(rawRef, ref.getRaw());
 
   // Bumping up access ref shouldn't affect admin ref and flags
   ASSERT_TRUE(ref.isInMMContainer());
   ASSERT_FALSE(ref.isAccessible());
-  ASSERT_FALSE(ref.isExclusive());
+  ASSERT_FALSE(ref.isMoving());
   ASSERT_EQ(RefcountWithFlags::kAccessRefMask, ref.getAccessRef());
   ASSERT_TRUE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag0>());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
@@ -128,7 +129,7 @@ void RefCountTest::testBasic() {
   // Bumping down access ref shouldn't affect admin ref and flags
   ASSERT_TRUE(ref.isInMMContainer());
   ASSERT_FALSE(ref.isAccessible());
-  ASSERT_FALSE(ref.isExclusive());
+  ASSERT_FALSE(ref.isMoving());
   ASSERT_EQ(0, ref.getAccessRef());
   ASSERT_TRUE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag0>());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
@@ -136,7 +137,7 @@ void RefCountTest::testBasic() {
   ref.template unSetFlag<RefcountWithFlags::Flags::kMMFlag0>();
   ASSERT_TRUE(ref.isInMMContainer());
   ASSERT_FALSE(ref.isAccessible());
-  ASSERT_FALSE(ref.isExclusive());
+  ASSERT_FALSE(ref.isMoving());
   ASSERT_EQ(0, ref.getAccessRef());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag0>());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
@@ -145,33 +146,104 @@ void RefCountTest::testBasic() {
   ASSERT_EQ(0, ref.getRaw());
   ASSERT_FALSE(ref.isInMMContainer());
   ASSERT_FALSE(ref.isAccessible());
-  ASSERT_FALSE(ref.isExclusive());
+  ASSERT_FALSE(ref.isMoving());
   ASSERT_EQ(0, ref.getAccessRef());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag0>());
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
 
   // conditionally set flags
-  ASSERT_FALSE((ref.markExclusive()));
+  ASSERT_FALSE((ref.markMoving()));
   ref.markInMMContainer();
-  ASSERT_TRUE((ref.markExclusive()));
-  ASSERT_FALSE((ref.isOnlyExclusive()));
+  // only first one succeeds
+  ASSERT_TRUE((ref.markMoving()));
+  ASSERT_FALSE((ref.markMoving()));
   ref.unmarkInMMContainer();
+
   ref.template setFlag<RefcountWithFlags::Flags::kMMFlag0>();
-  // Have no other admin refcount but with a flag still means "isOnlyExclusive"
-  ASSERT_TRUE((ref.isOnlyExclusive()));
+  // Have no other admin refcount but with a flag still means "isOnlyMoving"
+  ASSERT_TRUE((ref.isOnlyMoving()));
 
-  // Set some flags and verify that "isOnlyExclusive" does not care about flags
+  // Set some flags and verify that "isOnlyMoving" does not care about flags
   ref.markIsChainedItem();
   ASSERT_TRUE(ref.isChainedItem());
-  ASSERT_TRUE((ref.isOnlyExclusive()));
+  ASSERT_TRUE((ref.isOnlyMoving()));
   ref.unmarkIsChainedItem();
   ASSERT_FALSE(ref.isChainedItem());
-  ASSERT_TRUE((ref.isOnlyExclusive()));
+  ASSERT_TRUE((ref.isOnlyMoving()));
+}
+
+void RefCountTest::testMarkForEvictionAndMoving() {
+  {
+    // cannot mark for eviction when not in MMContainer
+    RefcountWithFlags ref;
+    ASSERT_FALSE(ref.markForEviction());
+  }
+
+  {
+    // can mark for eviction when in MMContainer
+    // and unmarkForEviction return value contains admin bits
+    RefcountWithFlags ref;
+    ref.markInMMContainer();
+    ASSERT_TRUE(ref.markForEviction());
+    ASSERT_TRUE(ref.unmarkForEviction() > 0);
+  }
+
+  {
+    // cannot mark for eviction when moving
+    RefcountWithFlags ref;
+    ref.markInMMContainer();
+
+    ASSERT_TRUE(ref.markMoving());
+    ASSERT_FALSE(ref.markForEviction());
+
+    ref.unmarkInMMContainer();
+    auto ret = ref.unmarkMoving();
+    ASSERT_EQ(ret, 0);
+  }
+
+  {
+    // cannot mark moving when marked for eviction
+    RefcountWithFlags ref;
+    ref.markInMMContainer();
+
+    ASSERT_TRUE(ref.markForEviction());
+    ASSERT_FALSE(ref.markMoving());
+
+    ref.unmarkInMMContainer();
+    auto ret = ref.unmarkForEviction();
+    ASSERT_EQ(ret, 0);
+  }
+
+  {
+    // can mark moving when ref count > 0
+    RefcountWithFlags ref;
+    ref.markInMMContainer();
+
+    ref.incRef();
+
+    ASSERT_TRUE(ref.markMoving());
+
+    ref.unmarkInMMContainer();
+    auto ret = ref.unmarkMoving();
+    ASSERT_EQ(ret, 1);
+  }
+
+  {
+    // cannot mark for eviction when ref count > 0
+    RefcountWithFlags ref;
+    ref.markInMMContainer();
+
+    ref.incRef();
+    ASSERT_FALSE(ref.markForEviction());
+  }
 }
 } // namespace
 
 TEST_F(RefCountTest, MutliThreaded) { testMultiThreaded(); }
 TEST_F(RefCountTest, Basic) { testBasic(); }
+TEST_F(RefCountTest, MarkForEvictionAndMoving) {
+  testMarkForEvictionAndMoving();
+}
 } // namespace tests
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/allocator/tests/SimpleRebalancingTest.h b/cachelib/allocator/tests/SimpleRebalancingTest.h
index c25c6b0d65..634882c730 100644
--- a/cachelib/allocator/tests/SimpleRebalancingTest.h
+++ b/cachelib/allocator/tests/SimpleRebalancingTest.h
@@ -34,12 +34,12 @@ struct SimpleRebalanceStrategy : public RebalanceStrategy {
   SimpleRebalanceStrategy() : RebalanceStrategy(PickNothingOrTest) {}
 
  private:
-  ClassId pickVictim(const CacheBase& allocator, PoolId pid) {
-    auto poolStats = allocator.getPoolStats(pid);
+  ClassId pickVictim(const CacheBase&, PoolId, const PoolStats& poolStats) {
     ClassId cid = Slab::kInvalidClassId;
     uint64_t maxActiveAllocs = 0;
     for (size_t i = 0; i < poolStats.mpStats.acStats.size(); ++i) {
-      const auto& acStats = poolStats.mpStats.acStats[static_cast<ClassId>(i)];
+      const auto& acStats =
+          poolStats.mpStats.acStats.at(static_cast<ClassId>(i));
       if (maxActiveAllocs < acStats.activeAllocs) {
         maxActiveAllocs = acStats.activeAllocs;
         cid = static_cast<ClassId>(i);
@@ -48,13 +48,16 @@ struct SimpleRebalanceStrategy : public RebalanceStrategy {
     return cid;
   }
 
-  ClassId pickVictimImpl(const CacheBase& allocator, PoolId pid) override {
-    return pickVictim(allocator, pid);
+  ClassId pickVictimImpl(const CacheBase& allocator,
+                         PoolId pid,
+                         const PoolStats& stats) override {
+    return pickVictim(allocator, pid, stats);
   }
 
   RebalanceContext pickVictimAndReceiverImpl(const CacheBase& allocator,
-                                             PoolId pid) override {
-    return {pickVictim(allocator, pid), Slab::kInvalidClassId};
+                                             PoolId pid,
+                                             const PoolStats& stats) override {
+    return {pickVictim(allocator, pid, stats), Slab::kInvalidClassId};
   }
 };
 
diff --git a/cachelib/benchmarks/DataTypeBench.thrift b/cachelib/benchmarks/DataTypeBench.thrift
index a1f16f4b7e..94d53ade81 100644
--- a/cachelib/benchmarks/DataTypeBench.thrift
+++ b/cachelib/benchmarks/DataTypeBench.thrift
@@ -17,11 +17,9 @@
 namespace cpp2 facebook.cachelib.datatypebench
 
 struct StdMap {
-  1: required map<i32, string> m,
+  1: required map<i32, string> m;
 }
 
 struct StdUnorderedMap {
-  1: required map<i32, string>
-  (cpp.template = "std::unordered_map")
-  m,
+  1: required map<i32, string> (cpp.template = "std::unordered_map") m;
 }
diff --git a/cachelib/cachebench/cache/Cache-inl.h b/cachelib/cachebench/cache/Cache-inl.h
index ed8bfd1b04..750abdf431 100644
--- a/cachelib/cachebench/cache/Cache-inl.h
+++ b/cachelib/cachebench/cache/Cache-inl.h
@@ -124,24 +124,41 @@ Cache<Allocator>::Cache(const CacheConfig& config,
       // already have a file, user provided it. We will also keep it around
       // after the tests.
       auto path = config_.nvmCachePaths[0];
-      if (cachelib::util::isDir(path)) {
+      bool isDir;
+      try {
+        isDir = cachelib::util::isDir(path);
+      } catch (const std::system_error& e) {
+        XLOGF(INFO, "nvmCachePath {} does not exist", path);
+        isDir = false;
+      }
+
+      if (isDir) {
         const auto uniqueSuffix = folly::sformat("nvmcache_{}_{}", ::getpid(),
                                                  folly::Random::rand32());
         path = path + "/" + uniqueSuffix;
         util::makeDir(path);
         nvmCacheFilePath_ = path;
+        XLOGF(INFO, "Configuring NVM cache: directory {} size {} MB", path,
+              config_.nvmCacheSizeMB);
         nvmConfig.navyConfig.setSimpleFile(path + "/navy_cache",
                                            config_.nvmCacheSizeMB * MB,
                                            true /*truncateFile*/);
       } else {
-        nvmConfig.navyConfig.setSimpleFile(path, config_.nvmCacheSizeMB * MB);
+        XLOGF(INFO, "Configuring NVM cache: simple file {} size {} MB", path,
+              config_.nvmCacheSizeMB);
+        nvmConfig.navyConfig.setSimpleFile(path, config_.nvmCacheSizeMB * MB,
+                                           true /* truncateFile */);
       }
     } else if (config_.nvmCachePaths.size() > 1) {
+      XLOGF(INFO, "Configuring NVM cache: RAID-0 ({} devices) size {} MB",
+            config_.nvmCachePaths.size(), config_.nvmCacheSizeMB);
       // set up a software raid-0 across each nvm cache path.
       nvmConfig.navyConfig.setRaidFiles(config_.nvmCachePaths,
                                         config_.nvmCacheSizeMB * MB);
     } else {
       // use memory to mock NVM.
+      XLOGF(INFO, "Configuring NVM cache: memory file size {} MB",
+            config_.nvmCacheSizeMB);
       nvmConfig.navyConfig.setMemoryFile(config_.nvmCacheSizeMB * MB);
     }
     nvmConfig.navyConfig.setDeviceMetadataSize(config_.nvmCacheMetadataSizeMB *
@@ -794,8 +811,17 @@ void Cache<Allocator>::setUint64ToItem(WriteHandle& handle,
 template <typename Allocator>
 void Cache<Allocator>::setStringItem(WriteHandle& handle,
                                      const std::string& str) {
-  auto ptr = reinterpret_cast<uint8_t*>(getMemory(handle));
-  std::memcpy(ptr, str.data(), std::min<size_t>(str.size(), getSize(handle)));
+  auto dataSize = getSize(handle);
+  if (dataSize < 1)
+    return;
+
+  auto ptr = reinterpret_cast<char*>(getMemory(handle));
+  std::strncpy(ptr, str.c_str(), dataSize);
+
+  // Make sure the copied string ends with null char
+  if (str.size() + 1 > dataSize) {
+    ptr[dataSize - 1] = '\0';
+  }
 }
 
 template <typename Allocator>
diff --git a/cachelib/cachebench/runner/AsyncCacheStressor.h b/cachelib/cachebench/runner/AsyncCacheStressor.h
index 5b50db43b4..830b503795 100644
--- a/cachelib/cachebench/runner/AsyncCacheStressor.h
+++ b/cachelib/cachebench/runner/AsyncCacheStressor.h
@@ -287,6 +287,10 @@ class AsyncCacheStressor : public Stressor {
     ++stats.get;
     auto lock = chainedItemAcquireUniqueLock(*key);
 
+    // This was moved outside the lambda, as otherwise gcc-8.x crashes with an
+    // internal compiler error here (suspected regression in folly).
+    XDCHECK(req->sizeBegin + 1 != req->sizeEnd);
+
     auto onReadyFn = [&, req, key, l = std::move(lock), pid](auto hdl) {
       WriteHandle wHdl;
       if (hdl == nullptr) {
@@ -303,7 +307,6 @@ class AsyncCacheStressor : public Stressor {
       } else {
         wHdl = std::move(hdl).toWriteHandle();
       }
-      XDCHECK(req->sizeBegin + 1 != req->sizeEnd);
       bool chainSuccessful = false;
       for (auto j = req->sizeBegin + 1; j != req->sizeEnd; j++) {
         ++stats.addChained;
diff --git a/cachelib/cachebench/runner/CacheStressor.h b/cachelib/cachebench/runner/CacheStressor.h
index 4ea54b1f14..976414b965 100644
--- a/cachelib/cachebench/runner/CacheStressor.h
+++ b/cachelib/cachebench/runner/CacheStressor.h
@@ -98,8 +98,14 @@ class CacheStressor : public Stressor {
     }
     cacheConfig.nvmWriteBytesCallback =
         std::bind(&CacheStressor<Allocator>::getNvmBytesWritten, this);
-    cache_ = std::make_unique<CacheT>(cacheConfig, movingSync,
-                                      cacheConfig.cacheDir, config_.touchValue);
+    try {
+      cache_ = std::make_unique<CacheT>(
+          cacheConfig, movingSync, cacheConfig.cacheDir, config_.touchValue);
+    } catch (const std::exception& e) {
+      XLOG(INFO) << "Exception while creating cache: " << e.what();
+      throw;
+    }
+
     if (config_.opPoolDistribution.size() > cache_->numPools()) {
       throw std::invalid_argument(folly::sformat(
           "more pools specified in the test than in the cache. "
@@ -254,9 +260,7 @@ class CacheStressor : public Stressor {
     }
 
     if (!itemValue.empty()) {
-      // Add the null character to ensure this is a proper c string.
-      // TODO(T141356292): Clean this up to avoid allocating a new string
-      cache_->setStringItem(handle, itemValue + "\0");
+      cache_->setStringItem(handle, itemValue);
     } else {
       cache_->setStringItem(handle, hardcodedString_);
     }
diff --git a/cachelib/cachebench/util/NandWrites.cpp b/cachelib/cachebench/util/NandWrites.cpp
index ae82aca65c..22cfc4d0d0 100644
--- a/cachelib/cachebench/util/NandWrites.cpp
+++ b/cachelib/cachebench/util/NandWrites.cpp
@@ -121,11 +121,11 @@ std::vector<std::string> getBytesWrittenLine(
   // /ritten/, so that's what we do here. We just use the first matching
   // line.
   std::vector<folly::StringPiece> lines;
-  folly::split("\n", out, lines, true /* ignoreEmpty */);
+  folly::split('\n', out, lines, true /* ignoreEmpty */);
   for (const auto& line : lines) {
     if (line.find("ritten") != std::string::npos) {
       std::vector<std::string> fields;
-      folly::split(" ", line, fields, true /* ignoreEmpty */);
+      folly::split(' ', line, fields, true /* ignoreEmpty */);
       return fields;
     }
   }
@@ -400,6 +400,7 @@ uint64_t nandWriteBytes(const folly::StringPiece& deviceName,
                             const folly::StringPiece&)>>
       vendorMap{{"samsung", samsungWriteBytes},
                 {"mz1lb960hbjr-", samsungWriteBytes},
+                {"mzol23t8hcls-", samsungWriteBytes},
                 // The Samsung PM983a doesn't include Samsung in the model
                 // number at this time, but it's a Samsung device.
                 {"liteon", liteonWriteBytes},
diff --git a/cachelib/cachebench/util/tests/NandWritesTest.cpp b/cachelib/cachebench/util/tests/NandWritesTest.cpp
index 0002e8a837..af09593f41 100644
--- a/cachelib/cachebench/util/tests/NandWritesTest.cpp
+++ b/cachelib/cachebench/util/tests/NandWritesTest.cpp
@@ -240,6 +240,96 @@ TEST_F(NandWritesTest, nandWriteBytes_handlesSamsungPM983aDevice) {
   EXPECT_EQ(nandWriteBytes("nvme1n1", kNvmePath, mockFactory_), 35061362294784);
 }
 
+TEST_F(NandWritesTest, nandWriteBytes_handlesSamsungPM9A3Device) {
+  constexpr auto& kListOutput = R"EOF({
+  "Devices" : [
+    {
+      "DevicePath" : "/dev/nvme0n1",
+      "Firmware" : "P1FB007",
+      "Index" : 0,
+      "NameSpace" : 1,
+      "ModelNumber" : "MTFDHBA512TCK",
+      "ProductName" : "Non-Volatile memory controller: Micron Technology Inc Device 0x5410",
+      "SerialNumber" : "        21062E6B8061",
+      "UsedBytes" : 512110190592,
+      "MaximumLBA" : 1000215216,
+      "PhysicalSize" : 512110190592,
+      "SectorSize" : 512
+    },
+    {
+      "DevicePath" : "/dev/nvme1n1",
+      "Firmware" : "GDA82F2Q",
+      "Index" : 1,
+      "NameSpace" : 1,
+      "ModelNumber" : "MZOL23T8HCLS-00AFB",
+      "ProductName" : "Unknown device",
+      "SerialNumber" : "S5X9NG0T116005",
+      "UsedBytes" : 104910848,
+      "MaximumLBA" : 918149526,
+      "PhysicalSize" : 3760740458496,
+      "SectorSize" : 4096
+    },
+    {
+      "DevicePath" : "/dev/nvme2n1",
+      "Firmware" : "GDA82F2Q",
+      "Index" : 2,
+      "NameSpace" : 1,
+      "ModelNumber" : "MZOL23T8HCLS-00AFB",
+      "ProductName" : "Unknown device",
+      "SerialNumber" : "S5X9NG0T116027",
+      "UsedBytes" : 0,
+      "MaximumLBA" : 918149526,
+      "PhysicalSize" : 3760740458496,
+      "SectorSize" : 4096
+    }
+  ]
+})EOF";
+
+  constexpr auto& kSmartLogOutput = R"EOF(
+[015:000] PhysicallyWrittenBytes                            : 241393664
+[031:016] Physically Read Bytes                             : 106217472
+[037:032] Bad NAND Block Count (Raw Value)                  : 0
+[039:038] Bad NAND Block Count (Normalized Value)           : 100
+[047:040] Uncorrectable Read Error Count                    : 0
+[055:048] Soft ECC Error Count                              : 0
+[059:056] SSD End to end Correction Count (Detected Errors) : 0
+[063:060] SSD End to end Correction Count (Corrected Errors): 0
+[064:064] System Data Percentage Used                       : 0
+[068:065] User Data Erase Count (Min)                       : 0
+[072:069] User Data Erase Count (Max)                       : 1
+[080:073] Refresh Count                                     : 0
+[086:081] Program Fail Count (Raw Value)                    : 0
+[088:087] Program Fail Count (Normalized Value)             : 100
+[094:089] User Data Erase Fail Count (Raw Value)            : 0
+[096:095] User Data Erase Fail Count (Normalized Value)     : 100
+[102:097] System Area Erase Fail Count (Raw Value)          : 0
+[104:103] System Area Erase Fail Count (Normalized value)   : 100
+[105:105] Thermal Throttling Status                         : 0
+[106:106] Thermal Throttling Count                          : 0
+[108:107] PHY Error Count                                   : 0
+[110:109] Bad DLLP Count                                    : 0
+[112:111] Bad TLP Count                                     : 0
+[114:113] Reserved                                          : 0
+[118:115] Incomplete Shutdowns                              : 0
+[119:119] % Free Blocks                                     : 96
+[121:120] PCIe Correctable Error Count (RTS)                : 0
+[123:122] PCIe Correctable Error Count (RRS)                : 0
+[131:124] XOR Recovery Count                                : 0
+[137:132] Bad System NAND block count (Raw Value)           : 0
+[139:138] Bad System NAND block count (Normalized Value)    : 100
+[141:140] Capacitor Health                                  : 163
+[157:142] Endurance Estimate                                : 28862181
+[165:158] Security Version Number                           : 4294967296
+[167:166] Log Page Version                                  : 1
+)EOF";
+
+  mockFactory_->expectedCommands(
+      {{{kNvmePath, "list", "-o", "json"}, kListOutput},
+       {{kNvmePath, "samsung", "vs-smart-add-log", "/dev/nvme1n1"},
+        kSmartLogOutput}});
+  EXPECT_EQ(nandWriteBytes("nvme1n1", kNvmePath, mockFactory_), 241393664);
+}
+
 TEST_F(NandWritesTest, nandWriteBytes_handlesSeagateDevice) {
   constexpr auto& kListOutput = R"EOF({
   "Devices" : [
diff --git a/cachelib/cachebench/workload/FastDiscrete.h b/cachelib/cachebench/workload/FastDiscrete.h
index 7c4341b376..ca04485472 100644
--- a/cachelib/cachebench/workload/FastDiscrete.h
+++ b/cachelib/cachebench/workload/FastDiscrete.h
@@ -107,9 +107,12 @@ class FastDiscreteDistribution final : public Distribution {
         sizes[i] -=
             facebook::cachelib::util::narrow_cast<size_t>(bucketPct * sizes[i]);
         probs[i] -= bucketPct * probs[i];
-        buckets.push_back(static_cast<uint64_t>(objectsSeen * scalingFactor_));
+
+        auto scaledObjects =
+            static_cast<uint64_t>(objectsSeen * scalingFactor_);
+        buckets.push_back(scaledObjects);
         if (bucketOffsets_.size() > 0) {
-          bucketOffsets_.push_back(bucketOffsets_.back() + objectsSeen);
+          bucketOffsets_.push_back(bucketOffsets_.back() + scaledObjects);
         }
         weightSeen = 0.0;
         objectsSeen = 0;
diff --git a/cachelib/cachebench/workload/KVReplayGenerator.h b/cachelib/cachebench/workload/KVReplayGenerator.h
index 4b12970081..a9124e2bd7 100644
--- a/cachelib/cachebench/workload/KVReplayGenerator.h
+++ b/cachelib/cachebench/workload/KVReplayGenerator.h
@@ -230,10 +230,10 @@ inline bool KVReplayGenerator::parseRequest(const std::string& line,
 
   // Set op
   const auto& op = fields[SampleFields::OP];
-  // TODO only memcache optypes are supported
-  if (!op.compare("GET")) {
+  // TODO implement GET_LEASE and SET_LEASE emulations
+  if (!op.compare("GET") || !op.compare("GET_LEASE")) {
     req->req_.setOp(OpType::kGet);
-  } else if (!op.compare("SET")) {
+  } else if (!op.compare("SET") || !op.compare("SET_LEASE")) {
     req->req_.setOp(OpType::kSet);
   } else if (!op.compare("DELETE")) {
     req->req_.setOp(OpType::kDel);
diff --git a/cachelib/cachebench/workload/PieceWiseReplayGenerator.cpp b/cachelib/cachebench/workload/PieceWiseReplayGenerator.cpp
index 732f2518fc..9362a64f7b 100644
--- a/cachelib/cachebench/workload/PieceWiseReplayGenerator.cpp
+++ b/cachelib/cachebench/workload/PieceWiseReplayGenerator.cpp
@@ -90,7 +90,7 @@ void PieceWiseReplayGenerator::getReqFromTrace() {
 
     try {
       std::vector<folly::StringPiece> fields;
-      folly::split(",", line, fields);
+      folly::split(',', line, fields);
 
       // TODO: remove this after legacy data phased out.
       if (fields.size() > totalFieldCount ||
diff --git a/cachelib/cachebench/workload/ReplayGeneratorBase.h b/cachelib/cachebench/workload/ReplayGeneratorBase.h
index bdff5a59e1..ee3e4d9808 100644
--- a/cachelib/cachebench/workload/ReplayGeneratorBase.h
+++ b/cachelib/cachebench/workload/ReplayGeneratorBase.h
@@ -65,7 +65,7 @@ class TraceFileStream {
     // Parses a line from the trace file into a vector.
     // Returns an empty vector
     std::vector<std::string> splitRow;
-    folly::split(",", line, splitRow);
+    folly::split(',', line, splitRow);
     if (splitRow.size() != columnKeyMap_.size()) {
       XLOG_EVERY_MS(WARNING, 1000)
           << "Expected row with " << columnKeyMap_.size()
@@ -134,7 +134,7 @@ class TraceFileStream {
   }
 
   void parseHeaderRow(const std::string& header) {
-    folly::split(",", header, keys_);
+    folly::split(',', header, keys_);
     for (size_t i = 0; i < keys_.size(); i++) {
       columnKeyMap_.emplace(folly::to<std::string>(keys_[i]), i);
     }
diff --git a/cachelib/cachebench/workload/tests/KVReplayGeneratorTest.cpp b/cachelib/cachebench/workload/tests/KVReplayGeneratorTest.cpp
index 72a55a4020..16e4e52060 100644
--- a/cachelib/cachebench/workload/tests/KVReplayGeneratorTest.cpp
+++ b/cachelib/cachebench/workload/tests/KVReplayGeneratorTest.cpp
@@ -56,6 +56,18 @@ struct TraceEntry {
     size_t expKeySize = std::max<size_t>(keySize_, reqKey.size());
     expKeySize = std::min<size_t>(expKeySize, 256);
     ASSERT_EQ(reqKey.size(), expKeySize);
+    ASSERT_EQ(req.req_.getOp(), getOpType());
+  }
+
+  OpType getOpType() {
+    if (!op_.compare("GET") || !op_.compare("GET_LEASE")) {
+      return OpType::kGet;
+    } else if (!op_.compare("SET") || !op_.compare("SET_LEASE")) {
+      return OpType::kSet;
+    } else if (!op_.compare("DELETE")) {
+      return OpType::kDel;
+    }
+    return OpType::kSize;
   }
 
   std::string key_;
@@ -86,8 +98,11 @@ TEST(KVReplayGeneratorTest, BasicFormat) {
       // <key_size>,<op>,<size>,<op_count>,<ttl>,<valid>
       {7, "GET", 0, 2, std::nullopt, true},
       {7, "GET", 0, 2, 50, true},
+      {7, "GET_LEASE", 0, 2, 50, true},
       {20, "SET", 100, 35, std::nullopt, true},
       {20, "SET", 100, 35, 3600, true},
+      {20, "SAT", 100, 35, 3600, false}, // invalid op name
+      {20, "SET_LEASE", 100, 35, 3600, true},
       {7, "GET", 0, 0, std::nullopt, false},      // invalid op count
       {7, "GET", 0, 0, 600, false},               // invalid op count
       {1024, "SET", 100, 35, 300, true},          // key truncated
diff --git a/cachelib/experimental/objcache/ObjectCachePersistence.thrift b/cachelib/experimental/objcache/ObjectCachePersistence.thrift
index fcddb82ef5..e5acba4d54 100644
--- a/cachelib/experimental/objcache/ObjectCachePersistence.thrift
+++ b/cachelib/experimental/objcache/ObjectCachePersistence.thrift
@@ -17,9 +17,9 @@
 namespace cpp2 facebook.cachelib.objcache.serialization
 
 struct Item {
-  1: byte poolId
-  2: i32 creationTime,
-  3: i32 expiryTime,
-  4: string key,
-  5: string payload,
+  1: byte poolId;
+  2: i32 creationTime;
+  3: i32 expiryTime;
+  4: string key;
+  5: string payload;
 }
diff --git a/cachelib/experimental/objcache/tests/ThriftCustomAllocator.thrift b/cachelib/experimental/objcache/tests/ThriftCustomAllocator.thrift
index 97f82d466a..b1610b3aad 100644
--- a/cachelib/experimental/objcache/tests/ThriftCustomAllocator.thrift
+++ b/cachelib/experimental/objcache/tests/ThriftCustomAllocator.thrift
@@ -24,22 +24,26 @@ struct UseSimpleCustomAllocator {
 
   // A template type like map needs to use "cpp.template" to specify a replacement template
   1: map<
+    // A concrete type like string needs to use "cpp.type" to specify a replacement type
+    string (
+      cpp.use_allocator,
+      cpp.type = "facebook::cachelib::objcache::test::TestString",
+    ),
+    string (
+      cpp.use_allocator,
+      cpp.type = "facebook::cachelib::objcache::test::TestString",
+    )
+  > (
+    cpp.use_allocator,
+    cpp.template = "facebook::cachelib::objcache::test::TestMap",
+  ) m;
 
-      // A concrete type like string needs to use "cpp.type" to specify a replacement type
-      string
-        (cpp.use_allocator,
-         cpp.type = "facebook::cachelib::objcache::test::TestString"),
-
-      string
-        (cpp.use_allocator,
-         cpp.type = "facebook::cachelib::objcache::test::TestString")
-
-     > (cpp.use_allocator,
-        cpp.template = "facebook::cachelib::objcache::test::TestMap") m;
-
-   // Native types or types that do not allocate memory do NOT need custom allocator
-   2: i32 m2;
-} (cpp.allocator="facebook::cachelib::objcache::test::ScopedTestAllocator", cpp.allocator_via="m")
+  // Native types or types that do not allocate memory do NOT need custom allocator
+  2: i32 m2;
+} (
+  cpp.allocator = "facebook::cachelib::objcache::test::ScopedTestAllocator",
+  cpp.allocator_via = "m",
+)
 // TODO: thrift allocator propagation behavior is broken. Right now, for the following
 //          myObj1 = myObj2;
 //       even if the allocator copy-assignment propagation is false, myObj2's
@@ -50,17 +54,21 @@ struct UseSimpleCustomAllocator {
 
 union UnionWithCustomAllocator {
   1: map<
-      i32,
-      string
-        (cpp.use_allocator,
-         cpp.type = "facebook::cachelib::objcache::test::TestString")
-     > (cpp.use_allocator,
-        cpp.template = "facebook::cachelib::objcache::test::TestMap")m1;
-  2: string
-      (cpp.use_allocator,
-       cpp.type = "facebook::cachelib::objcache::test::TestString") m2;
+    i32,
+    string (
+      cpp.use_allocator,
+      cpp.type = "facebook::cachelib::objcache::test::TestString",
+    )
+  > (
+    cpp.use_allocator,
+    cpp.template = "facebook::cachelib::objcache::test::TestMap",
+  ) m1;
+  2: string (
+    cpp.use_allocator,
+    cpp.type = "facebook::cachelib::objcache::test::TestString",
+  ) m2;
   3: i32 m3;
-} (cpp.allocator="facebook::cachelib::objcache::test::ScopedTestAllocator")
+} (cpp.allocator = "facebook::cachelib::objcache::test::ScopedTestAllocator")
 // TODO: even though thrift union does not support allocator. We still need to
 //       annotate it with allocator so it has a `get_allocator()` method so
 //       that when deserializing it will be able to pass an allocator an inner
@@ -89,11 +97,14 @@ union UnionWithCustomAllocator {
 //          }
 
 struct UseTwoF14Maps {
-  1: map<i32, i32>
-       (cpp.use_allocator,
-        cpp.template = "facebook::cachelib::objcache::test::TestFollyF14FastMap") m1;
-  2: map<i32, double>
-       (cpp.use_allocator,
-        cpp.template = "facebook::cachelib::objcache::test::TestFollyF14FastMap") m2;
-} (cpp.allocator=
-     "facebook::cachelib::objcache::test::TestF14TemplateAllocator<std::pair<const int32_t, int32_t>>")
+  1: map<i32, i32> (
+    cpp.use_allocator,
+    cpp.template = "facebook::cachelib::objcache::test::TestFollyF14FastMap",
+  ) m1;
+  2: map<i32, double> (
+    cpp.use_allocator,
+    cpp.template = "facebook::cachelib::objcache::test::TestFollyF14FastMap",
+  ) m2;
+} (
+  cpp.allocator = "facebook::cachelib::objcache::test::TestF14TemplateAllocator<std::pair<const int32_t, int32_t>>",
+)
diff --git a/cachelib/experimental/objcache2/ObjectCache-inl.h b/cachelib/experimental/objcache2/ObjectCache-inl.h
index 345a27d528..18aae657c9 100644
--- a/cachelib/experimental/objcache2/ObjectCache-inl.h
+++ b/cachelib/experimental/objcache2/ObjectCache-inl.h
@@ -128,8 +128,8 @@ std::shared_ptr<const T> ObjectCache<AllocatorT>::find(folly::StringPiece key) {
   succL1Lookups_.inc();
 
   auto ptr = found->template getMemoryAs<ObjectCacheItem>()->objectPtr;
-  // Just release the handle. Cache destorys object when all handles released.
-  auto deleter = [h = std::move(found)](const T*) {};
+  // Use custom deleter
+  auto deleter = Deleter<const T>(std::move(found));
   return std::shared_ptr<const T>(reinterpret_cast<const T*>(ptr),
                                   std::move(deleter));
 }
@@ -146,19 +146,20 @@ std::shared_ptr<T> ObjectCache<AllocatorT>::findToWrite(
   succL1Lookups_.inc();
 
   auto ptr = found->template getMemoryAs<ObjectCacheItem>()->objectPtr;
-  // Just release the handle. Cache destorys object when all handles released.
-  auto deleter = [h = std::move(found)](T*) {};
+  // Use custom deleter
+  auto deleter = Deleter<T>(std::move(found));
   return std::shared_ptr<T>(reinterpret_cast<T*>(ptr), std::move(deleter));
 }
 
 template <typename AllocatorT>
 template <typename T>
-std::pair<typename ObjectCache<AllocatorT>::AllocStatus, std::shared_ptr<T>>
+std::tuple<typename ObjectCache<AllocatorT>::AllocStatus,
+           std::shared_ptr<T>,
+           std::shared_ptr<T>>
 ObjectCache<AllocatorT>::insertOrReplace(folly::StringPiece key,
                                          std::unique_ptr<T> object,
                                          size_t objectSize,
-                                         uint32_t ttlSecs,
-                                         std::shared_ptr<T>* replacedPtr) {
+                                         uint32_t ttlSecs) {
   if (config_.objectSizeTrackingEnabled && objectSize == 0) {
     throw std::invalid_argument(
         "Object size tracking is enabled but object size is set to be 0.");
@@ -176,7 +177,8 @@ ObjectCache<AllocatorT>::insertOrReplace(folly::StringPiece key,
       allocateFromL1(key, ttlSecs, 0 /* use current time as creationTime */);
   if (!handle) {
     insertErrors_.inc();
-    return {AllocStatus::kAllocError, std::shared_ptr<T>(std::move(object))};
+    return {AllocStatus::kAllocError, std::shared_ptr<T>(std::move(object)),
+            nullptr};
   }
   // We don't release the object here because insertOrReplace could throw when
   // the replaced item is out of refcount; in this case, the object isn't
@@ -187,21 +189,17 @@ ObjectCache<AllocatorT>::insertOrReplace(folly::StringPiece key,
 
   auto replaced = this->l1Cache_->insertOrReplace(handle);
 
+  std::shared_ptr<T> replacedPtr = nullptr;
   if (replaced) {
     replaces_.inc();
-    if (replacedPtr) {
-      auto itemPtr = reinterpret_cast<ObjectCacheItem*>(replaced->getMemory());
-      // Just release the handle. Cache destorys object when all handles
-      // released.
-      auto deleter = [h = std::move(replaced)](T*) {};
-      *replacedPtr = std::shared_ptr<T>(
-          reinterpret_cast<T*>(itemPtr->objectPtr), std::move(deleter));
-    }
+    auto itemPtr = reinterpret_cast<ObjectCacheItem*>(replaced->getMemory());
+    // Just release the handle. Cache destorys object when all handles
+    // released.
+    auto deleter = [h = std::move(replaced)](T*) {};
+    replacedPtr = std::shared_ptr<T>(reinterpret_cast<T*>(itemPtr->objectPtr),
+                                     std::move(deleter));
   }
 
-  // Just release the handle. Cache destorys object when all handles released.
-  auto deleter = [h = std::move(handle)](T*) {};
-
   // update total object size
   if (config_.objectSizeTrackingEnabled) {
     totalObjectSizeBytes_.fetch_add(objectSize, std::memory_order_relaxed);
@@ -209,7 +207,11 @@ ObjectCache<AllocatorT>::insertOrReplace(folly::StringPiece key,
 
   // Release the object as it has been successfully inserted to the cache.
   object.release();
-  return {AllocStatus::kSuccess, std::shared_ptr<T>(ptr, std::move(deleter))};
+
+  // Use custom deleter
+  auto deleter = Deleter<T>(std::move(handle));
+  return {AllocStatus::kSuccess, std::shared_ptr<T>(ptr, std::move(deleter)),
+          replacedPtr};
 }
 
 template <typename AllocatorT>
@@ -254,8 +256,8 @@ ObjectCache<AllocatorT>::insert(folly::StringPiece key,
     object.release();
   }
 
-  // Just release the handle. Cache destorys object when all handles released.
-  auto deleter = [h = std::move(handle)](T*) {};
+  // Use custom deleter
+  auto deleter = Deleter<T>(std::move(handle));
   return {success ? AllocStatus::kSuccess : AllocStatus::kKeyAlreadyExists,
           std::shared_ptr<T>(ptr, std::move(deleter))};
 }
@@ -413,6 +415,49 @@ bool ObjectCache<AllocatorT>::recover() {
   return restorer.run();
 }
 
+template <typename AllocatorT>
+template <typename T>
+void ObjectCache<AllocatorT>::mutateObject(const std::shared_ptr<T>& object,
+                                           std::function<void()> mutateCb,
+                                           const std::string& mutateCtx) {
+  if (!object) {
+    return;
+  }
+
+  cachelib::objcache2::ThreadMemoryTracker tMemTracker;
+  size_t memUsageBefore = tMemTracker.getMemUsageBytes();
+  mutateCb();
+  size_t memUsageAfter = tMemTracker.getMemUsageBytes();
+
+  auto& hdl = getWriteHandleRefInternal<T>(object);
+  size_t memUsageDiff = 0;
+  size_t oldObjectSize = 0;
+  if (memUsageAfter > memUsageBefore) { // updated to a larger value
+    memUsageDiff = memUsageAfter - memUsageBefore;
+    // do atomic update on objectSize
+    oldObjectSize = __sync_fetch_and_add(
+        &(reinterpret_cast<ObjectCacheItem*>(hdl->getMemory())->objectSize),
+        memUsageDiff);
+    totalObjectSizeBytes_.fetch_add(memUsageDiff, std::memory_order_relaxed);
+  } else if (memUsageAfter < memUsageBefore) { // updated to a smaller value
+    memUsageDiff = memUsageBefore - memUsageAfter;
+    // do atomic update on objectSize
+    oldObjectSize = __sync_fetch_and_sub(
+        &(reinterpret_cast<ObjectCacheItem*>(hdl->getMemory())->objectSize),
+        memUsageDiff);
+    totalObjectSizeBytes_.fetch_sub(memUsageDiff, std::memory_order_relaxed);
+  }
+
+  // TODO T149177357: for debugging purpose, remove the log later
+  XLOGF_EVERY_MS(
+      INFO, 60'000,
+      "[Object-Cache mutate][{}] type: {}, memUsageBefore: {}, memUsageAfter: "
+      "{}, memUsageDiff:{}, oldObjectSize: {}, curObjectSize: {}, "
+      "curTotalObjectSize: {}",
+      mutateCtx, typeid(T).name(), memUsageBefore, memUsageAfter, memUsageDiff,
+      oldObjectSize, getObjectSize(object), getTotalObjectSize());
+}
+
 } // namespace objcache2
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/experimental/objcache2/ObjectCache.h b/cachelib/experimental/objcache2/ObjectCache.h
index 85abac068e..934d4c92d1 100644
--- a/cachelib/experimental/objcache2/ObjectCache.h
+++ b/cachelib/experimental/objcache2/ObjectCache.h
@@ -27,6 +27,7 @@
 #include <stdexcept>
 #include <string>
 #include <thread>
+#include <typeinfo>
 #include <vector>
 
 #include "cachelib/allocator/CacheAllocator.h"
@@ -38,6 +39,7 @@
 #include "cachelib/experimental/objcache2/ObjectCacheSizeController.h"
 #include "cachelib/experimental/objcache2/persistence/Persistence.h"
 #include "cachelib/experimental/objcache2/persistence/gen-cpp2/persistent_data_types.h"
+#include "cachelib/experimental/objcache2/util/ThreadMemoryTracker.h"
 
 namespace facebook {
 namespace cachelib {
@@ -94,6 +96,43 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
   // make constructor private, but constructable by std::make_unique
   struct InternalConstructor {};
 
+  template <typename T>
+  class Deleter {
+   public:
+    using ReadHandle = typename AllocatorT::ReadHandle;
+    using WriteHandle = typename AllocatorT::WriteHandle;
+    using Handle = std::variant<ReadHandle, WriteHandle>;
+
+    explicit Deleter(typename AllocatorT::ReadHandle&& hdl)
+        : hdl_(std::move(hdl)) {}
+    explicit Deleter(typename AllocatorT::WriteHandle&& hdl)
+        : hdl_(std::move(hdl)) {}
+
+    void operator()(T*) {
+      // Just release the handle.
+      // Cache destorys object when all handles released.
+      std::holds_alternative<ReadHandle>(hdl_)
+          ? std::get<ReadHandle>(hdl_).reset()
+          : std::get<WriteHandle>(hdl_).reset();
+    }
+
+    WriteHandle& getWriteHandleRef() {
+      if (std::holds_alternative<ReadHandle>(hdl_)) {
+        hdl_ = std::move(std::get<ReadHandle>(hdl_)).toWriteHandle();
+      }
+      return std::get<WriteHandle>(hdl_);
+    }
+
+    ReadHandle& getReadHandleRef() {
+      return std::holds_alternative<ReadHandle>(hdl_)
+                 ? std::get<ReadHandle>(hdl_)
+                 : std::get<WriteHandle>(hdl_);
+    }
+
+   private:
+    Handle hdl_;
+  };
+
  public:
   using ItemDestructor = std::function<void(ObjectCacheDestructorData)>;
   using Key = KAllocation::Key;
@@ -146,22 +185,20 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
   //                     if objectSizeTracking is enabled, a non-zero value must
   //                     be passed.
   // @param ttlSecs      object expiring seconds.
-  // @param replacedPtr  a pointer to a shared_ptr, if it is not nullptr it will
-  //                     be assigned to the replaced object.
   //
   // @throw cachelib::exception::RefcountOverflow if the item we are replacing
   //        is already out of refcounts.
   // @throw std::invalid_argument if objectSizeTracking is enabled but
   //        objectSize is 0.
-  // @return a pair of allocation status and shared_ptr of newly inserted
-  //         object.
+  // @return a tuple of allocation status, shared_ptr of newly inserted
+  //         object and shared_ptr of old object that has been replaced (nullptr
+  //         if no replacement happened)
   template <typename T>
-  std::pair<AllocStatus, std::shared_ptr<T>> insertOrReplace(
-      folly::StringPiece key,
-      std::unique_ptr<T> object,
-      size_t objectSize = 0,
-      uint32_t ttlSecs = 0,
-      std::shared_ptr<T>* replacedPtr = nullptr);
+  std::tuple<AllocStatus, std::shared_ptr<T>, std::shared_ptr<T>>
+  insertOrReplace(folly::StringPiece key,
+                  std::unique_ptr<T> object,
+                  size_t objectSize = 0,
+                  uint32_t ttlSecs = 0);
 
   // Insert the object into the cache with given key. If the key exists in the
   // cache, the new object won't be inserted.
@@ -232,6 +269,93 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
                : sizeController_->getCurrentEntriesLimit();
   }
 
+  // Get the expiry timestamp of the object
+  // @param  object   object shared pointer returned from ObjectCache APIs
+  //
+  // @return the expiry timestamp in seconds of the object
+  //         0 if object is nullptr
+  template <typename T>
+  uint32_t getExpiryTimeSec(const std::shared_ptr<T>& object) const {
+    if (object == nullptr) {
+      return 0;
+    }
+    return getReadHandleRefInternal<T>(object)->getExpiryTime();
+  }
+
+  // Get the configured TTL of the object
+  // @param  object   object shared pointer returned from ObjectCache APIs
+  //
+  // @return the configured TTL in seconds of the object
+  //         0 if object is nullptr
+  template <typename T>
+  std::chrono::seconds getConfiguredTtl(
+      const std::shared_ptr<T>& object) const {
+    if (object == nullptr) {
+      return std::chrono::seconds{0};
+    }
+    return getReadHandleRefInternal<T>(object)->getConfiguredTTL();
+  }
+
+  // Update the expiry timestamp of an object
+  //
+  // @param  object         object shared pointer returned from ObjectCache APIs
+  // @param  expiryTimeSecs the expiryTime in seconds to update
+  //
+  // @return boolean indicating whether expiry time was successfully updated
+  template <typename T>
+  bool updateExpiryTimeSec(std::shared_ptr<T>& object,
+                           uint32_t expiryTimeSecs) {
+    if (object == nullptr) {
+      return false;
+    }
+    return getWriteHandleRefInternal<T>(object)->updateExpiryTime(
+        expiryTimeSecs);
+  }
+
+  // Update expiry time to @ttl seconds from now.
+  //
+  // @param  object    object shared pointer returned from ObjectCache APIs
+  // @param  ttl       TTL in seconds (from now)
+  //
+  // @return boolean indicating whether TTL was successfully extended
+  template <typename T>
+  bool extendTtl(std::shared_ptr<T>& object, std::chrono::seconds ttl) {
+    if (object == nullptr) {
+      return false;
+    }
+    return getWriteHandleRefInternal<T>(object)->extendTTL(ttl);
+  }
+
+  // Mutate object and update the object size
+  // When size-awareness is enabled, users must call this API to mutate the
+  // object. Otherwise, we won't be able to track the updated object size
+  //
+  // @param  object       shared pointer of the object to be mutated (must be
+  //                      fetched from ObjectCache APIs)
+  // @param  mutateCb     callback containing the mutation logic
+  // @param  mutateCtx    context string of this mutation operation, for
+  //                      logging purpose
+  template <typename T>
+  void mutateObject(const std::shared_ptr<T>& object,
+                    std::function<void()> mutateCb,
+                    const std::string& mutateCtx = "");
+
+  // Get the size of the object
+  //
+  // @param  object       object shared pointer returned from ObjectCache APIs
+  //
+  // @return the object size if size-awareness is enabled
+  //         0 otherwise
+  template <typename T>
+  size_t getObjectSize(const std::shared_ptr<T>& object) const {
+    if (!object) {
+      return 0;
+    }
+    return reinterpret_cast<const ObjectCacheItem*>(
+               getReadHandleRefInternal<T>(object)->getMemory())
+        ->objectSize;
+  }
+
  protected:
   // Serialize cache allocator config for exporting to Scuba
   std::map<std::string, std::string> serializeConfigParams() const override;
@@ -272,6 +396,28 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
   bool stopSizeController(std::chrono::seconds timeout = std::chrono::seconds{
                               0});
 
+  // Get a ReadHandle reference from the object shared_ptr
+  template <typename T>
+  typename AllocatorT::ReadHandle& getReadHandleRefInternal(
+      const std::shared_ptr<T>& object) const {
+    auto* deleter = std::get_deleter<Deleter<T>>(object);
+    XDCHECK(deleter != nullptr);
+    auto& hdl = deleter->getReadHandleRef();
+    XDCHECK(hdl != nullptr);
+    return hdl;
+  }
+
+  // Get a WriteHandle reference from the object shared_ptr
+  template <typename T>
+  typename AllocatorT::WriteHandle& getWriteHandleRefInternal(
+      const std::shared_ptr<T>& object) {
+    auto* deleter = std::get_deleter<Deleter<T>>(object);
+    XDCHECK(deleter != nullptr);
+    auto& hdl = deleter->getWriteHandleRef();
+    XDCHECK(hdl != nullptr);
+    return hdl;
+  }
+
   // Config passed to the cache.
   Config config_{};
 
diff --git a/cachelib/experimental/objcache2/ObjectCacheSizeController-inl.h b/cachelib/experimental/objcache2/ObjectCacheSizeController-inl.h
index 2bf5de4452..66bb5309b8 100644
--- a/cachelib/experimental/objcache2/ObjectCacheSizeController-inl.h
+++ b/cachelib/experimental/objcache2/ObjectCacheSizeController-inl.h
@@ -33,6 +33,9 @@ void ObjectCacheSizeController<AllocatorT>::work() {
                               objCache_.config_.l1EntriesLimit / 100) {
     auto averageObjSize = totalObjSize / currentNumEntries;
     auto newEntriesLimit = objCache_.config_.cacheSizeLimit / averageObjSize;
+    // entriesLimit should never exceed the configured entries limit
+    newEntriesLimit =
+        std::min(newEntriesLimit, objCache_.config_.l1EntriesLimit);
     if (newEntriesLimit < currentEntriesLimit_ &&
         currentNumEntries >= newEntriesLimit) {
       // shrink cache when getting a lower new limit and current entries num
@@ -46,7 +49,7 @@ void ObjectCacheSizeController<AllocatorT>::work() {
     }
 
     XLOGF_EVERY_MS(INFO, 60'000,
-                   "CacheLib object-cache: total object size = {}, current "
+                   "CacheLib size-controller: total object size = {}, current "
                    "entries = {}, average object size = "
                    "{}, new entries limit = {}, current entries limit = {}",
                    totalObjSize, currentNumEntries, averageObjSize,
@@ -73,7 +76,7 @@ void ObjectCacheSizeController<AllocatorT>::shrinkCacheByEntriesNum(
 
   XLOGF_EVERY_MS(
       INFO, 60'000,
-      "CacheLib object-cache: request to shrink cache by {} entries. "
+      "CacheLib size-controller: request to shrink cache by {} entries. "
       "Placeholders num before: {}, after: {}. currentEntriesLimit: {}",
       entries, size, objCache_.placeholders_.size(), currentEntriesLimit_);
 }
@@ -92,7 +95,7 @@ void ObjectCacheSizeController<AllocatorT>::expandCacheByEntriesNum(
 
   XLOGF_EVERY_MS(
       INFO, 60'000,
-      "CacheLib object-cache: request to expand cache by {} entries. "
+      "CacheLib size-controller: request to expand cache by {} entries. "
       "Placeholders num before: {}, after: {}. currentEntriesLimit: {}",
       entries, size, objCache_.placeholders_.size(), currentEntriesLimit_);
 }
diff --git a/cachelib/experimental/objcache2/persistence/Serialization.h b/cachelib/experimental/objcache2/persistence/Serialization.h
index cccb414b45..4edad88e4b 100644
--- a/cachelib/experimental/objcache2/persistence/Serialization.h
+++ b/cachelib/experimental/objcache2/persistence/Serialization.h
@@ -68,9 +68,9 @@ struct ObjectDeserializer {
     Deserializer deserializer{reinterpret_cast<const uint8_t*>(payload.begin()),
                               reinterpret_cast<const uint8_t*>(payload.end())};
     auto ptr = std::make_unique<T>(deserializer.deserialize<T>());
-    auto [allocStatus, _] =
+    auto res =
         objCache_.insertOrReplace(key, std::move(ptr), objectSize, ttlSecs);
-    return allocStatus == ObjectCache::AllocStatus::kSuccess;
+    return std::get<0>(res) == ObjectCache::AllocStatus::kSuccess;
   }
 
   // cache key of the object to be deserialized
diff --git a/cachelib/experimental/objcache2/persistence/persistent_data.thrift b/cachelib/experimental/objcache2/persistence/persistent_data.thrift
index 486e130a16..1d310693f5 100644
--- a/cachelib/experimental/objcache2/persistence/persistent_data.thrift
+++ b/cachelib/experimental/objcache2/persistence/persistent_data.thrift
@@ -17,12 +17,12 @@
 namespace cpp2 facebook.cachelib.objcache2.persistence
 
 struct Item {
-  1: string key,
-  2: string payload,
-  3: i32 objectSize,
-  4: i32 expiryTime,
+  1: string key;
+  2: string payload;
+  3: i32 objectSize;
+  4: i32 expiryTime;
 }
 
 struct Metadata {
-  1: i32 threadCount,
+  1: i32 threadCount;
 }
diff --git a/cachelib/experimental/objcache2/tests/ObjectCacheTest.cpp b/cachelib/experimental/objcache2/tests/ObjectCacheTest.cpp
index 0bcc120de9..dbfcd5b6a3 100644
--- a/cachelib/experimental/objcache2/tests/ObjectCacheTest.cpp
+++ b/cachelib/experimental/objcache2/tests/ObjectCacheTest.cpp
@@ -16,6 +16,9 @@
 
 #include <gtest/gtest.h>
 
+#include <cstddef>
+#include <memory>
+
 #include "cachelib/allocator/CacheAllocator.h"
 #include "cachelib/experimental/objcache2/ObjectCache.h"
 #include "cachelib/experimental/objcache2/persistence/gen-cpp2/persistent_data_types.h"
@@ -206,12 +209,12 @@ class ObjectCacheTest : public ::testing::Test {
     foo->a = 1;
     foo->b = 2;
     foo->c = 3;
-    auto res = objcache->insertOrReplace("Foo", std::move(foo));
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res.first);
-    ASSERT_NE(nullptr, res.second);
-    EXPECT_EQ(1, res.second->a);
-    EXPECT_EQ(2, res.second->b);
-    EXPECT_EQ(3, res.second->c);
+    auto [allocRes, ptr, _] = objcache->insertOrReplace("Foo", std::move(foo));
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, allocRes);
+    ASSERT_NE(nullptr, ptr);
+    EXPECT_EQ(1, ptr->a);
+    EXPECT_EQ(2, ptr->b);
+    EXPECT_EQ(3, ptr->c);
 
     auto found2 = objcache->template find<Foo>("Foo");
     ASSERT_NE(nullptr, found2);
@@ -238,7 +241,7 @@ class ObjectCacheTest : public ::testing::Test {
     foo->b = 2;
     foo->c = 3;
     auto res1 = objcache->insertOrReplace("Foo", std::move(foo));
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res1.first);
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, std::get<0>(res1));
 
     auto found1 = objcache->template find<Foo>("Foo");
     ASSERT_NE(nullptr, found1);
@@ -251,7 +254,7 @@ class ObjectCacheTest : public ::testing::Test {
     foo2->e = 5;
     foo2->f = 6;
     auto res2 = objcache->insertOrReplace("Foo2", std::move(foo2));
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res2.first);
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, std::get<0>(res2));
 
     auto found2 = objcache->template find<Foo2>("Foo2");
     ASSERT_NE(nullptr, found2);
@@ -272,7 +275,7 @@ class ObjectCacheTest : public ::testing::Test {
     foo4->b = 2;
     foo4->c = 3;
     auto res1 = objcache->insertOrReplace("Foo4", std::move(foo4));
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res1.first);
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, std::get<0>(res1));
 
     auto found1 = objcache->template find<Foo4>("Foo4");
     ASSERT_NE(nullptr, found1);
@@ -285,7 +288,7 @@ class ObjectCacheTest : public ::testing::Test {
     foo5->e = 5;
     foo5->f = 6;
     auto res2 = objcache->insertOrReplace("Foo5", std::move(foo5));
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res2.first);
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, std::get<0>(res2));
 
     auto found2 = objcache->template find<Foo5>("Foo5");
     ASSERT_NE(nullptr, found2);
@@ -385,11 +388,14 @@ class ObjectCacheTest : public ::testing::Test {
     foo1->a = 1;
     foo1->b = 2;
     foo1->c = 3;
-    std::shared_ptr<Foo> replaced;
-    auto res =
-        objcache->insertOrReplace("Foo", std::move(foo1), 0, 0, &replaced);
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res.first);
-    EXPECT_EQ(nullptr, replaced);
+
+    auto [res1, ptr1, replaced1] =
+        objcache->insertOrReplace("Foo", std::move(foo1));
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res1);
+    EXPECT_EQ(1, ptr1->a);
+    EXPECT_EQ(2, ptr1->b);
+    EXPECT_EQ(3, ptr1->c);
+    EXPECT_EQ(nullptr, replaced1);
 
     auto found1 = objcache->template find<Foo>("Foo");
     ASSERT_NE(nullptr, found1);
@@ -401,12 +407,16 @@ class ObjectCacheTest : public ::testing::Test {
     foo2->a = 10;
     foo2->b = 20;
     foo2->c = 30;
-    res = objcache->insertOrReplace("Foo", std::move(foo2), 0, 0, &replaced);
-    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res.first);
-    ASSERT_NE(nullptr, replaced);
-    EXPECT_EQ(1, replaced->a);
-    EXPECT_EQ(2, replaced->b);
-    EXPECT_EQ(3, replaced->c);
+    auto [res2, ptr2, replaced2] =
+        objcache->insertOrReplace("Foo", std::move(foo2));
+    EXPECT_EQ(ObjectCache::AllocStatus::kSuccess, res2);
+    EXPECT_EQ(10, ptr2->a);
+    EXPECT_EQ(20, ptr2->b);
+    EXPECT_EQ(30, ptr2->c);
+    ASSERT_NE(nullptr, replaced2);
+    EXPECT_EQ(1, replaced2->a);
+    EXPECT_EQ(2, replaced2->b);
+    EXPECT_EQ(3, replaced2->c);
 
     auto found2 = objcache->template find<Foo>("Foo");
     ASSERT_NE(nullptr, found2);
@@ -497,7 +507,7 @@ class ObjectCacheTest : public ::testing::Test {
     // replace foo1 with foo2
     {
       auto res = objcache->insertOrReplace("Foo", std::move(foo2), foo2Size);
-      ASSERT_EQ(ObjectCache::AllocStatus::kSuccess, res.first);
+      ASSERT_EQ(ObjectCache::AllocStatus::kSuccess, std::get<0>(res));
 
       auto found = objcache->template find<Foo>("Foo");
       ASSERT_NE(nullptr, found);
@@ -562,6 +572,235 @@ class ObjectCacheTest : public ::testing::Test {
     EXPECT_EQ(3, found2->c);
   }
 
+  template <typename T>
+  void checkObjectSizeTracking(ObjectCache& objcache,
+                               const std::shared_ptr<T>& object,
+                               std::function<void()> mutateCb) {
+    objcache.mutateObject(object, std::move(mutateCb));
+
+    ThreadMemoryTracker tMemTracker;
+    auto memUsage1 = tMemTracker.getMemUsageBytes();
+    auto objectCopy = std::make_unique<T>(*object);
+    auto memUsage2 = tMemTracker.getMemUsageBytes();
+
+    EXPECT_EQ(memUsage2 - memUsage1, objcache.template getObjectSize(object));
+  }
+
+  void checkTotalObjectSize(ObjectCache& objcache) {
+    size_t totalObjectSize = 0;
+    for (auto itr = objcache.getL1Cache().begin();
+         itr != objcache.getL1Cache().end();
+         ++itr) {
+      totalObjectSize +=
+          reinterpret_cast<const ObjectCacheItem*>(itr.asHandle()->getMemory())
+              ->objectSize;
+    }
+    EXPECT_EQ(totalObjectSize, objcache.getTotalObjectSize());
+  }
+
+  void checkObjectSizeTrackingUnorderedMap() {
+    using ObjectType = std::unordered_map<std::string, std::string>;
+    ObjectCacheConfig config;
+    config.setCacheName("test")
+        .setCacheCapacity(10'000 /* l1EntriesLimit*/)
+        .setItemDestructor([&](ObjectCacheDestructorData data) {
+          data.deleteObject<ObjectType>();
+        });
+    config.objectSizeTrackingEnabled = true;
+    auto objcache = ObjectCache::create(config);
+
+    // create an empty map
+    ThreadMemoryTracker tMemTracker;
+    auto memUsage1 = tMemTracker.getMemUsageBytes();
+    auto map = std::make_unique<ObjectType>();
+    auto memUsage2 = tMemTracker.getMemUsageBytes();
+
+    auto [_, ptr, __] = objcache->insertOrReplace("cacheKey", std::move(map),
+                                                  memUsage2 - memUsage1);
+    EXPECT_EQ(memUsage2 - memUsage1, objcache->template getObjectSize(ptr));
+    EXPECT_EQ(memUsage2 - memUsage1, objcache->getTotalObjectSize());
+
+    auto found = objcache->template findToWrite<ObjectType>("cacheKey");
+    ASSERT_NE(nullptr, found);
+
+    // add an entry
+    auto cb1 = [&found]() { (*found)["key"] = "tiny"; };
+    // replace the entry with a longer string
+    auto cb2 = [&found]() {
+      (*found)["key"] = "longgggggggggggggggggggggggggggstringgggggggggggg";
+    };
+    // replace the entry with a shorter string
+    auto cb3 = [&found]() {
+      auto tmp = std::make_unique<std::string>("short");
+      using std::swap;
+      swap((*found)["key"], *tmp);
+    };
+    // remove the entry
+    auto cb4 = [&found]() { found->erase("key"); };
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb1));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb2));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb3));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb4));
+    checkTotalObjectSize(*objcache);
+  }
+
+  void checkObjectSizeTrackingVector() {
+    using ObjectType = std::vector<Foo>;
+    ObjectCacheConfig config;
+    config.setCacheName("test")
+        .setCacheCapacity(10'000 /* l1EntriesLimit*/)
+        .setItemDestructor([&](ObjectCacheDestructorData data) {
+          data.deleteObject<ObjectType>();
+        });
+    config.objectSizeTrackingEnabled = true;
+    auto objcache = ObjectCache::create(config);
+
+    // create an empty vector
+    ThreadMemoryTracker tMemTracker;
+    auto memUsage1 = tMemTracker.getMemUsageBytes();
+    auto vec = std::make_unique<ObjectType>();
+    auto memUsage2 = tMemTracker.getMemUsageBytes();
+
+    auto [_, ptr, __] = objcache->insertOrReplace("cacheKey", std::move(vec),
+                                                  memUsage2 - memUsage1);
+    EXPECT_EQ(memUsage2 - memUsage1, objcache->template getObjectSize(ptr));
+    EXPECT_EQ(memUsage2 - memUsage1, objcache->getTotalObjectSize());
+
+    auto found = objcache->template findToWrite<ObjectType>("cacheKey");
+    ASSERT_NE(nullptr, found);
+
+    // add an entry using emplace_back
+    auto cb1 = [&found]() { found->emplace_back(Foo{1, 2, 3}); };
+
+    // add another entry using push_back
+    auto cb2 = [&found]() { found->push_back(Foo{4, 5, 6}); };
+
+    // remove the entry from the end using pop_back
+    auto cb3 = [&found]() {
+      found->pop_back();
+      found->shrink_to_fit();
+    };
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb1));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb2));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb3));
+    checkTotalObjectSize(*objcache);
+  }
+
+  void checkObjectSizeTrackingString() {
+    using ObjectType = std::string;
+    ObjectCacheConfig config;
+    config.setCacheName("test")
+        .setCacheCapacity(10'000 /* l1EntriesLimit*/)
+        .setItemDestructor([&](ObjectCacheDestructorData data) {
+          data.deleteObject<ObjectType>();
+        });
+    config.objectSizeTrackingEnabled = true;
+    auto objcache = ObjectCache::create(config);
+
+    // create an empty string
+    ThreadMemoryTracker tMemTracker;
+    auto memUsage1 = tMemTracker.getMemUsageBytes();
+    auto str = std::make_unique<ObjectType>();
+    auto memUsage2 = tMemTracker.getMemUsageBytes();
+
+    auto [_, ptr, __] = objcache->insertOrReplace("cacheKey", std::move(str),
+                                                  memUsage2 - memUsage1);
+    EXPECT_EQ(memUsage2 - memUsage1, objcache->template getObjectSize(ptr));
+    EXPECT_EQ(memUsage2 - memUsage1, objcache->getTotalObjectSize());
+
+    auto found = objcache->template findToWrite<ObjectType>("cacheKey");
+    ASSERT_NE(nullptr, found);
+
+    // set a value
+    auto cb1 = [&found]() { *found = "tiny"; };
+    // replace the value with a longer string
+    auto cb2 = [&found]() {
+      *found = "longgggggggggggggggggggggggggggstringgggggggggggg";
+    };
+    // replace the value with a shorter string
+    auto cb3 = [&found]() {
+      *found = "short";
+      (*found).shrink_to_fit();
+    };
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb1));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb2));
+    checkTotalObjectSize(*objcache);
+
+    checkObjectSizeTracking<ObjectType>(*objcache, found, std::move(cb3));
+    checkTotalObjectSize(*objcache);
+  }
+
+  void testObjectSizeTrackingWithMutation() {
+    if (!folly::usingJEMalloc()) {
+      return;
+    }
+
+    checkObjectSizeTrackingUnorderedMap();
+    checkObjectSizeTrackingVector();
+    checkObjectSizeTrackingString();
+  }
+
+  void testMultithreadObjectSizeTrackingWithMutation() {
+    if (!folly::usingJEMalloc()) {
+      return;
+    }
+
+    using ObjectType = std::unordered_map<std::string, std::string>;
+
+    ObjectCacheConfig config;
+    config.setCacheName("test")
+        .setCacheCapacity(10'000 /* l1EntriesLimit*/)
+        .setItemDestructor([&](ObjectCacheDestructorData data) {
+          data.deleteObject<ObjectType>();
+        });
+    config.objectSizeTrackingEnabled = true;
+    auto objcache = ObjectCache::create(config);
+
+    // create an empty map
+    ThreadMemoryTracker tMemTracker;
+    auto memUsage1 = tMemTracker.getMemUsageBytes();
+    auto map = std::make_unique<ObjectType>();
+    auto memUsage2 = tMemTracker.getMemUsageBytes();
+
+    objcache->insertOrReplace("cacheKey", std::move(map),
+                              memUsage2 - memUsage1);
+
+    auto runMutateObjectOps = [&](int i) {
+      auto found = objcache->template findToWrite<ObjectType>("cacheKey");
+      ASSERT_NE(nullptr, found);
+      objcache->mutateObject(found, [&found, i]() {
+        (*found)[folly::sformat("key_{}", i)] = folly::sformat("value_{}", i);
+      });
+    };
+
+    std::vector<std::thread> rs;
+    for (int i = 0; i < 10; i++) {
+      rs.push_back(std::thread{runMutateObjectOps, i + 1});
+    }
+    for (int i = 0; i < 10; i++) {
+      rs[i].join();
+    }
+
+    auto found = objcache->template find<ObjectType>("cacheKey");
+    EXPECT_EQ(objcache->template getObjectSize(found),
+              objcache->getTotalObjectSize());
+  }
+
   void testPersistence() {
     auto persistBaseFilePath = std::tmpnam(nullptr);
     ThriftFoo foo1;
@@ -879,6 +1118,69 @@ class ObjectCacheTest : public ::testing::Test {
     }
   }
 
+  void testGetTtl() {
+    const uint32_t ttlSecs = 600;
+
+    ObjectCacheConfig config;
+    config.setCacheName("test").setCacheCapacity(10'000).setItemDestructor(
+        [&](ObjectCacheDestructorData data) { data.deleteObject<Foo>(); });
+    auto objcache = ObjectCache::create(config);
+
+    auto before = util::getCurrentTimeSec();
+    std::this_thread::sleep_for(std::chrono::seconds{3});
+    objcache->insertOrReplace("Foo", std::make_unique<Foo>(), 0 /*object size*/,
+                              ttlSecs);
+
+    // lookup via find API
+    auto found1 = objcache->template find<Foo>("Foo");
+    ASSERT_NE(nullptr, found1);
+
+    // get TTL info
+    EXPECT_EQ(ttlSecs, objcache->getConfiguredTtl(found1).count());
+    EXPECT_LE(before + ttlSecs, objcache->getExpiryTimeSec(found1));
+
+    // lookup via findToWrite API
+    auto found2 = objcache->template findToWrite<Foo>("Foo");
+    ASSERT_NE(nullptr, found2);
+
+    // get TTL info
+    EXPECT_EQ(ttlSecs, objcache->getConfiguredTtl(found2).count());
+    EXPECT_LE(before + ttlSecs, objcache->getExpiryTimeSec(found2));
+  }
+
+  void testUpdateTtl() {
+    const uint32_t ttlSecs = 600;
+
+    ObjectCacheConfig config;
+    config.setCacheName("test").setCacheCapacity(10'000).setItemDestructor(
+        [&](ObjectCacheDestructorData data) { data.deleteObject<Foo>(); });
+    auto objcache = ObjectCache::create(config);
+
+    auto insertionTime = util::getCurrentTimeSec();
+    objcache->insertOrReplace("Foo", std::make_unique<Foo>(), 0 /*object size*/,
+                              ttlSecs);
+
+    auto found = objcache->template find<Foo>("Foo");
+    ASSERT_NE(nullptr, found);
+
+    // get TTL info
+    EXPECT_EQ(ttlSecs, objcache->getConfiguredTtl(found).count());
+    EXPECT_LE(insertionTime + ttlSecs, objcache->getExpiryTimeSec(found));
+
+    // update expiry time
+    auto currExpTime = objcache->getExpiryTimeSec(found);
+    EXPECT_TRUE(objcache->updateExpiryTimeSec(found, currExpTime + ttlSecs));
+    EXPECT_EQ(2 * ttlSecs, objcache->getConfiguredTtl(found).count());
+    EXPECT_EQ(currExpTime + ttlSecs, objcache->getExpiryTimeSec(found));
+
+    // extend TTL
+    auto now = util::getCurrentTimeSec();
+    std::this_thread::sleep_for(std::chrono::seconds{3});
+    EXPECT_TRUE(objcache->extendTtl(found, std::chrono::seconds(3 * ttlSecs)));
+    EXPECT_LE(now + ttlSecs, objcache->getExpiryTimeSec(found));
+    EXPECT_LE(3 * ttlSecs, objcache->getConfiguredTtl(found).count());
+  }
+
   void testMultithreadReplace() {
     // Sanity test to see if insertOrReplace across multiple
     // threads are safe.
@@ -1072,6 +1374,32 @@ class ObjectCacheTest : public ::testing::Test {
       fs[i].join();
     }
   }
+
+  void testMultithreadUpdateTtl() {
+    // Sanity test to see if update TTL across multiple
+    // threads is safe.
+    ObjectCacheConfig config;
+    config.setCacheName("test").setCacheCapacity(10'000).setItemDestructor(
+        [&](ObjectCacheDestructorData data) { data.deleteObject<Foo>(); });
+    auto objcache = ObjectCache::create(config);
+    objcache->insertOrReplace("key", std::make_unique<Foo>(), 0, 60);
+
+    auto runUpdateTtlOps = [&] {
+      for (int i = 0; i < 2000; i++) {
+        auto found = objcache->template find<Foo>("key");
+        auto configuredTtlSecs = objcache->getConfiguredTtl(found).count();
+        objcache->extendTtl(found, std::chrono::seconds{configuredTtlSecs});
+      }
+    };
+
+    std::vector<std::thread> ts;
+    for (int i = 0; i < 10; i++) {
+      ts.push_back(std::thread{runUpdateTtlOps});
+    }
+    for (int i = 0; i < 10; i++) {
+      ts[i].join();
+    }
+  }
 };
 
 using AllocatorTypes = ::testing::Types<LruAllocator,
@@ -1102,6 +1430,13 @@ TYPED_TEST(ObjectCacheTest, ObjectSizeTrackingBasics) {
 TYPED_TEST(ObjectCacheTest, ObjectSizeTrackingUniqueInsert) {
   this->testObjectSizeTrackingUniqueInsert();
 }
+TYPED_TEST(ObjectCacheTest, ObjectSizeTrackingWithMutation) {
+  this->testObjectSizeTrackingWithMutation();
+}
+TYPED_TEST(ObjectCacheTest, MultithreadObjectSizeTrackingWithMutation) {
+  this->testMultithreadObjectSizeTrackingWithMutation();
+}
+
 TYPED_TEST(ObjectCacheTest, Persistence) { this->testPersistence(); }
 TYPED_TEST(ObjectCacheTest, PersistenceMultiType) {
   this->testPersistenceMultiType();
@@ -1110,6 +1445,9 @@ TYPED_TEST(ObjectCacheTest, PersistenceHighLoad) {
   this->testPersistenceHighLoad();
 }
 
+TYPED_TEST(ObjectCacheTest, GetTtl) { this->testGetTtl(); }
+TYPED_TEST(ObjectCacheTest, UpdateTtl) { this->testUpdateTtl(); }
+
 TYPED_TEST(ObjectCacheTest, MultithreadReplace) {
   this->testMultithreadReplace();
 }
@@ -1128,6 +1466,9 @@ TYPED_TEST(ObjectCacheTest, MultithreadFindAndEviction) {
 TYPED_TEST(ObjectCacheTest, MultithreadFindAndReplaceWith10Shards) {
   this->testMultithreadFindAndReplaceWith10Shards();
 }
+TYPED_TEST(ObjectCacheTest, MultithreadUpdateTtl) {
+  this->testMultithreadUpdateTtl();
+}
 
 using ObjectCache = ObjectCache<LruAllocator>;
 TEST(ObjectCacheTest, LruEviction) {
diff --git a/cachelib/experimental/objcache2/tests/test_object.thrift b/cachelib/experimental/objcache2/tests/test_object.thrift
index bf6ad07432..a448014cd9 100644
--- a/cachelib/experimental/objcache2/tests/test_object.thrift
+++ b/cachelib/experimental/objcache2/tests/test_object.thrift
@@ -17,13 +17,13 @@
 namespace cpp2 facebook.cachelib.objcache2.test
 
 struct ThriftFoo {
-    1: i32 a;
-    2: i32 b;
-    3: i32 c;
+  1: i32 a;
+  2: i32 b;
+  3: i32 c;
 }
 
 struct ThriftFoo2 {
-    1: i32 d;
-    2: i32 e;
-    3: i32 f;
+  1: i32 d;
+  2: i32 e;
+  3: i32 f;
 }
diff --git a/cachelib/external/fbthrift b/cachelib/external/fbthrift
index 33a9fbc258..62c333519d 160000
--- a/cachelib/external/fbthrift
+++ b/cachelib/external/fbthrift
@@ -1 +1 @@
-Subproject commit 33a9fbc258f21818f20ea03a55c979014882e84d
+Subproject commit 62c333519d7ee3a47f2d89daf5ae3b164b10560a
diff --git a/cachelib/external/fizz b/cachelib/external/fizz
index 9198ca6e7d..459b560771 160000
--- a/cachelib/external/fizz
+++ b/cachelib/external/fizz
@@ -1 +1 @@
-Subproject commit 9198ca6e7daa50fae6b8413d745b4faaf97dfd10
+Subproject commit 459b560771a0d67ae33e2a97dc670ec64a074f45
diff --git a/cachelib/external/folly b/cachelib/external/folly
index 128cfac6ac..b8b3ed56ec 160000
--- a/cachelib/external/folly
+++ b/cachelib/external/folly
@@ -1 +1 @@
-Subproject commit 128cfac6ac3d69825bad2af852fced3f63d87411
+Subproject commit b8b3ed56ecd1aff05abe7c2e5085da40e9ffad5f
diff --git a/cachelib/external/wangle b/cachelib/external/wangle
index 6bc77c8d46..a153b33438 160000
--- a/cachelib/external/wangle
+++ b/cachelib/external/wangle
@@ -1 +1 @@
-Subproject commit 6bc77c8d46b5ef68d77e921bb1e3d1e576adb8fe
+Subproject commit a153b33438e47b9fb1a8967d923ff72af24582f9
diff --git a/cachelib/navy/block_cache/BlockCache.cpp b/cachelib/navy/block_cache/BlockCache.cpp
index f34605b68b..84dadd13e7 100644
--- a/cachelib/navy/block_cache/BlockCache.cpp
+++ b/cachelib/navy/block_cache/BlockCache.cpp
@@ -723,9 +723,11 @@ void BlockCache::getCounters(const CounterVisitor& visitor) const {
           reclaimValueChecksumErrorCount_.get(),
           CounterVisitor::CounterType::RATE);
   visitor("navy_bc_cleanup_entry_header_checksum_errors",
-          cleanupEntryHeaderChecksumErrorCount_.get());
+          cleanupEntryHeaderChecksumErrorCount_.get(),
+          CounterVisitor::CounterType::RATE);
   visitor("navy_bc_cleanup_value_checksum_errors",
-          cleanupValueChecksumErrorCount_.get());
+          cleanupValueChecksumErrorCount_.get(),
+          CounterVisitor::CounterType::RATE);
   visitor("navy_bc_succ_lookups", succLookupCount_.get(),
           CounterVisitor::CounterType::RATE);
   visitor("navy_bc_removes", removeCount_.get(),
@@ -750,7 +752,8 @@ void BlockCache::getCounters(const CounterVisitor& visitor) const {
   visitor("navy_bc_reinsertion_errors", reinsertionErrorCount_.get(),
           CounterVisitor::CounterType::RATE);
   visitor("navy_bc_lookup_for_item_destructor_errors",
-          lookupForItemDestructorErrorCount_.get());
+          lookupForItemDestructorErrorCount_.get(),
+          CounterVisitor::CounterType::RATE);
   visitor("navy_bc_remove_attempt_collisions", removeAttemptCollisions_.get(),
           CounterVisitor::CounterType::RATE);
   // Allocator visits region manager
diff --git a/cachelib/navy/driver/Driver.cpp b/cachelib/navy/driver/Driver.cpp
index 1615d1cc48..29215cc161 100644
--- a/cachelib/navy/driver/Driver.cpp
+++ b/cachelib/navy/driver/Driver.cpp
@@ -273,8 +273,10 @@ void Driver::getCounters(const CounterVisitor& visitor) const {
           CounterVisitor::CounterType::RATE);
   visitor("navy_rejected_bytes", rejectedBytes_.get(),
           CounterVisitor::CounterType::RATE);
-  visitor("navy_accepted_bytes", acceptedBytes_.get());
-  visitor("navy_accepted", acceptedCount_.get());
+  visitor("navy_accepted_bytes", acceptedBytes_.get(),
+          CounterVisitor::CounterType::RATE);
+  visitor("navy_accepted", acceptedCount_.get(),
+          CounterVisitor::CounterType::RATE);
 
   visitor("navy_parcel_memory", parcelMemory_.get());
   visitor("navy_concurrent_inserts", concurrentInserts_.get());
diff --git a/cachelib/navy/serialization/objects.thrift b/cachelib/navy/serialization/objects.thrift
index 32be4bd17e..887c90c6ba 100644
--- a/cachelib/navy/serialization/objects.thrift
+++ b/cachelib/navy/serialization/objects.thrift
@@ -17,78 +17,78 @@
 namespace cpp2 facebook.cachelib.navy.serialization
 
 struct IndexEntry {
-  1: required i32 key = 0,
-  2: required i32 address = 0,
-  3: i16 sizeHint = 0,
-  4: byte totalHits = 0,
-  5: byte currentHits = 0,
+  1: required i32 key = 0;
+  2: required i32 address = 0;
+  3: i16 sizeHint = 0;
+  4: byte totalHits = 0;
+  5: byte currentHits = 0;
 }
 
 struct IndexBucket {
-  1: required i32 bucketId = 0,
-  2: required list<IndexEntry> entries,
+  1: required i32 bucketId = 0;
+  2: required list<IndexEntry> entries;
 }
 
 struct Region {
-  1: required i32 regionId = 0,
-  2: required i32 lastEntryEndOffset = 0,
-  3: required i32 classId = 0,
-  4: required i32 numItems = 0,
-  5: required bool pinned = false,
-  6: i32 priority = 0,
+  1: required i32 regionId = 0;
+  2: required i32 lastEntryEndOffset = 0;
+  3: required i32 classId = 0;
+  4: required i32 numItems = 0;
+  5: required bool pinned = false;
+  6: i32 priority = 0;
 }
 
 struct RegionData {
-  1: required list<Region> regions,
-  2: required i32 regionSize = 0,
+  1: required list<Region> regions;
+  2: required i32 regionSize = 0;
 }
 
 struct FifoPolicyNodeData {
-  1: required i32 idx,
-  2: required i64 trackTime,
+  1: required i32 idx;
+  2: required i64 trackTime;
 }
 
-struct FifoPolicyData{
-  1: required list<FifoPolicyNodeData> queue,
+struct FifoPolicyData {
+  1: required list<FifoPolicyNodeData> queue;
 }
 
 struct AccessStats {
-  1: byte totalHits = 0,
-  2: byte currHits = 0,
-  3: byte numReinsertions = 0,
+  1: byte totalHits = 0;
+  2: byte currHits = 0;
+  3: byte numReinsertions = 0;
 }
 
 struct AccessStatsPair {
-  1: i64 key,
-  2: AccessStats stats,
+  1: i64 key;
+  2: AccessStats stats;
 }
 
 struct AccessTracker {
-  1: map<i64, AccessStats> deprecated_data,
-  2: list<AccessStatsPair> data,
+  1: map<i64, AccessStats> deprecated_data;
+  2: list<AccessStatsPair> data;
 }
 
 struct BlockCacheConfig {
-  1: required i64 version = 0,
-  2: required i64 cacheBaseOffset = 0,
-  3: required i64 cacheSize = 0,
-  4: required i32 allocAlignSize = 0,
-  5: required set<i32> deprecated_sizeClasses,
-  6: required bool checksum = false,
-  7: map<i64, i64> deprecated_sizeDist,
-  8: i64 holeCount = 0,
-  9: i64 holeSizeTotal = 0,
-  10: bool reinsertionPolicyEnabled = false,
-  11: i64 usedSizeBytes = 0,
+  1: required i64 version = 0;
+  2: required i64 cacheBaseOffset = 0;
+  3: required i64 cacheSize = 0;
+  4: required i32 allocAlignSize = 0;
+  5: required set<i32> deprecated_sizeClasses;
+  6: required bool checksum = false;
+  7: map<i64, i64> deprecated_sizeDist;
+  8: i64 holeCount = 0;
+  9: i64 holeSizeTotal = 0;
+  10: bool reinsertionPolicyEnabled = false;
+  11: i64 usedSizeBytes = 0;
 }
 
 struct BigHashPersistentData {
-  1: required i32 version = 0,
-  2: required i64 generationTime = 0,
-  3: required i64 itemCount = 0,
-  4: required i64 bucketSize = 0,
-  5: required i64 cacheBaseOffset = 0,
-  6: required i64 numBuckets = 0,
-  7: map<i64, i64> deprecated_sizeDist,
-  8: i64 usedSizeBytes = 0,
+  1: required i32 version = 0;
+  2: required i64 generationTime = 0;
+  3: required i64 itemCount = 0;
+  4: required i64 bucketSize = 0;
+  5: required i64 cacheBaseOffset = 0;
+  6: required i64 numBuckets = 0;
+  7: map<i64, i64> deprecated_sizeDist;
+  8: i64 usedSizeBytes = 0;
 }
diff --git a/cachelib/persistence/tests/PersistenceCache.h b/cachelib/persistence/tests/PersistenceCache.h
index 5400b4d4ea..1db5b5fc8a 100644
--- a/cachelib/persistence/tests/PersistenceCache.h
+++ b/cachelib/persistence/tests/PersistenceCache.h
@@ -213,7 +213,7 @@ class PersistenceCache {
  public:
   const uint32_t kNumKeys = 1024 * 1024;    // 1 million
   const size_t kCacheSize = 100 * kNumKeys; // 100MB
-  const size_t kCapacity = 4 * kCacheSize;  // 400MB
+  const size_t kCapacity = 5 * kCacheSize;  // 500MB
 
   std::unique_ptr<folly::IOBuf> buffer_;
   std::string cacheDir_;
diff --git a/cachelib/rust/readonly/readonly.cpp b/cachelib/rust/readonly/readonly.cpp
deleted file mode 100644
index 53ed7c6bd7..0000000000
--- a/cachelib/rust/readonly/readonly.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cachelib/rust/readonly/readonly.h"
-
-namespace facebook {
-namespace rust {
-namespace cachelib {
-
-std::unique_ptr<facebook::cachelib::ReadOnlySharedCacheView>
-ro_cache_view_attach(const std::string& cache_dir) {
-  return std::make_unique<facebook::cachelib::ReadOnlySharedCacheView>(
-      cache_dir, false);
-}
-
-std::unique_ptr<facebook::cachelib::ReadOnlySharedCacheView>
-ro_cache_view_attach_at_address(const std::string& cache_dir, size_t addr) {
-  return std::make_unique<facebook::cachelib::ReadOnlySharedCacheView>(
-      cache_dir, false, (void*)addr);
-}
-
-uintptr_t ro_cache_view_get_shm_mapping_address(
-    const facebook::cachelib::ReadOnlySharedCacheView& cacheView) {
-  return cacheView.getShmMappingAddress();
-}
-
-const uint8_t* ro_cache_view_get_item_ptr_from_offset(
-    const facebook::cachelib::ReadOnlySharedCacheView& cacheView,
-    size_t offset) {
-  return static_cast<const uint8_t*>(
-      const_cast<facebook::cachelib::ReadOnlySharedCacheView&>(cacheView)
-          .getItemPtrFromOffset(offset));
-}
-
-} // namespace cachelib
-} // namespace rust
-} // namespace facebook
diff --git a/cachelib/rust/readonly/readonly.h b/cachelib/rust/readonly/readonly.h
deleted file mode 100644
index 673593ec38..0000000000
--- a/cachelib/rust/readonly/readonly.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <string>
-
-#include "cachelib/allocator/ReadOnlySharedCacheView.h"
-
-namespace facebook {
-namespace rust {
-namespace cachelib {
-
-std::unique_ptr<facebook::cachelib::ReadOnlySharedCacheView>
-ro_cache_view_attach(const std::string& cache_dir);
-
-std::unique_ptr<facebook::cachelib::ReadOnlySharedCacheView>
-ro_cache_view_attach_at_address(const std::string& cache_dir, size_t);
-
-uintptr_t ro_cache_view_get_shm_mapping_address(
-    const facebook::cachelib::ReadOnlySharedCacheView& cache);
-
-const uint8_t* ro_cache_view_get_item_ptr_from_offset(
-    const facebook::cachelib::ReadOnlySharedCacheView& cacheView,
-    size_t offset);
-} // namespace cachelib
-} // namespace rust
-} // namespace facebook
diff --git a/cachelib/rust/readonly/readonly.rs b/cachelib/rust/readonly/readonly.rs
deleted file mode 100644
index 371b9a1596..0000000000
--- a/cachelib/rust/readonly/readonly.rs
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-use std::os::unix::ffi::OsStrExt;
-use std::path::Path;
-
-use cxx::let_cxx_string;
-use thiserror::Error;
-
-#[derive(Debug, Error)]
-#[error("Failed to attach ReadOnlySharedCacheView: {cxx_exn}")]
-pub struct FailedToAttachError {
-    #[from]
-    cxx_exn: cxx::Exception,
-}
-
-#[derive(Debug, Error)]
-#[error("Invalid remote item handle: {cxx_exn}")]
-pub struct InvalidHandleError {
-    #[from]
-    cxx_exn: cxx::Exception,
-}
-
-#[cxx::bridge(namespace = "facebook::rust::cachelib")]
-mod ffi {
-    unsafe extern "C++" {
-        include!("cachelib/rust/readonly/readonly.h");
-
-        #[namespace = "facebook::cachelib"]
-        type ReadOnlySharedCacheView;
-
-        fn ro_cache_view_attach(
-            cache_dir: &CxxString,
-        ) -> Result<UniquePtr<ReadOnlySharedCacheView>>;
-        fn ro_cache_view_attach_at_address(
-            cache_dir: &CxxString,
-            addr: usize,
-        ) -> Result<UniquePtr<ReadOnlySharedCacheView>>;
-        fn ro_cache_view_get_shm_mapping_address(cache: &ReadOnlySharedCacheView) -> usize;
-        fn ro_cache_view_get_item_ptr_from_offset(
-            cache: &ReadOnlySharedCacheView,
-            offset: usize,
-        ) -> Result<*const u8>;
-    }
-}
-
-pub struct ReadOnlySharedCacheView {
-    cache_view: cxx::UniquePtr<ffi::ReadOnlySharedCacheView>,
-}
-
-impl ReadOnlySharedCacheView {
-    pub fn new(cache_dir: impl AsRef<Path>) -> Result<Self, FailedToAttachError> {
-        let_cxx_string!(cache_dir = cache_dir.as_ref().as_os_str().as_bytes());
-        let cache_view = ffi::ro_cache_view_attach(&cache_dir)?;
-        Ok(Self { cache_view })
-    }
-
-    pub fn new_at_address(
-        cache_dir: impl AsRef<Path>,
-        addr: *mut std::ffi::c_void,
-    ) -> Result<Self, FailedToAttachError> {
-        let_cxx_string!(cache_dir = cache_dir.as_ref().as_os_str().as_bytes());
-        let cache_view = ffi::ro_cache_view_attach_at_address(&cache_dir, addr as usize)?;
-        Ok(Self { cache_view })
-    }
-
-    /// Return a byte slice from a (offset, len) pair within the cache.
-    /// (offset, len) must be retrieved using [get_remote_handle] from an LruCacheHandle.
-    pub fn get_bytes_from_offset<'a>(
-        &'a self,
-        offset: usize,
-        len: usize,
-    ) -> Result<&'a [u8], InvalidHandleError> {
-        let item_ptr = ffi::ro_cache_view_get_item_ptr_from_offset(&*self.cache_view, offset)?;
-        Ok(unsafe { std::slice::from_raw_parts(item_ptr, len) })
-    }
-
-    pub fn shm_mapping_address(&self) -> usize {
-        let addr = ffi::ro_cache_view_get_shm_mapping_address(&*self.cache_view);
-        if addr == 0 {
-            // This shouldn't happen--the whole point of ReadOnlySharedCacheView
-            // is that it's a view into a cache's shared memory, and
-            // getShmMappingAddress should return nullptr only when the cache is
-            // not using shared memory.
-            panic!("ReadOnlySharedCacheView returned null shm_mapping_address")
-        } else {
-            addr
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use std::path::PathBuf;
-    use std::time::Duration;
-
-    use anyhow::Result;
-    use bytes::Bytes;
-    use cachelib::*;
-    use fbinit::FacebookInit;
-    use tempdir::TempDir;
-
-    use super::*;
-
-    fn create_temp_dir(dir_prefix: &str) -> TempDir {
-        TempDir::new(dir_prefix).expect("failed to create temp dir")
-    }
-
-    fn create_shared_cache(fb: FacebookInit, cache_directory: PathBuf) {
-        let config = LruCacheConfig::new(128 * 1024 * 1024)
-            .set_shrinker(ShrinkMonitor {
-                shrinker_type: ShrinkMonitorType::ResidentSize {
-                    max_process_size_gib: 16,
-                    min_process_size_gib: 1,
-                },
-                interval: Duration::new(1, 0),
-                max_resize_per_iteration_percent: 10,
-                max_removed_percent: 90,
-                strategy: RebalanceStrategy::LruTailAge {
-                    age_difference_ratio: 0.1,
-                    min_retained_slabs: 1,
-                },
-            })
-            .set_pool_resizer(PoolResizeConfig {
-                interval: Duration::new(1, 0),
-                slabs_per_iteration: 100,
-                strategy: RebalanceStrategy::LruTailAge {
-                    age_difference_ratio: 0.1,
-                    min_retained_slabs: 1,
-                },
-            })
-            .set_cache_dir(cache_directory)
-            .set_pool_rebalance(PoolRebalanceConfig {
-                interval: Duration::new(1, 0),
-                strategy: RebalanceStrategy::LruTailAge {
-                    age_difference_ratio: 0.1,
-                    min_retained_slabs: 1,
-                },
-            });
-
-        if let Err(e) = init_cache(fb, config) {
-            panic!("{}", e);
-        }
-    }
-
-    #[fbinit::test]
-    fn test_readonly_shared_cache(fb: FacebookInit) -> Result<()> {
-        let temp_dir = create_temp_dir("test_shared_cache");
-        create_shared_cache(fb, temp_dir.path().into());
-
-        // Set value in original cache
-        let pool = get_or_create_pool("find_pool_by_name", 4 * 1024 * 1024)?;
-        let value = b"I am a fish";
-        pool.set(b"test", Bytes::from(value.as_ref()))?;
-
-        let test_handle = pool.get_handle(b"test")?.unwrap();
-        let remote_handle = test_handle.get_remote_handle()?;
-
-        // Get value from read-only cache
-        let ro_cache_view = ReadOnlySharedCacheView::new(&temp_dir.path())?;
-        let slice = ro_cache_view
-            .get_bytes_from_offset(remote_handle.get_offset(), remote_handle.get_length())?;
-        let reader_bytes = Bytes::copy_from_slice(slice);
-
-        // Verify that value is the same
-        assert_eq!(
-            reader_bytes,
-            Bytes::from(b"I am a fish".as_ref()),
-            "Data does not match!"
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_non_existent_cache_dir() {
-        let temp_dir = create_temp_dir("test_non_existent_cache_dir");
-
-        let mut path = temp_dir.path().to_owned();
-        path.push("this_dir_does_not_exist");
-
-        match ReadOnlySharedCacheView::new(&path) {
-            Ok(_) => panic!("ReadOnlySharedCacheView::new returned Ok for non-existent dir"),
-            Err(FailedToAttachError { .. }) => {}
-        }
-    }
-}
diff --git a/cachelib/rust/src/cachelib.cpp b/cachelib/rust/src/cachelib.cpp
index 6a48cbdc3f..4e50508e50 100644
--- a/cachelib/rust/src/cachelib.cpp
+++ b/cachelib/rust/src/cachelib.cpp
@@ -29,20 +29,20 @@ namespace facebook {
 namespace rust {
 namespace cachelib {
 std::unique_ptr<facebook::cachelib::CacheAdmin> make_cacheadmin(
-    facebook::cachelib::LruAllocator& cache, const std::string& oncall) {
+    LruAllocator& cache, const std::string& oncall) {
   facebook::cachelib::CacheAdmin::Config adminConfig;
   adminConfig.oncall = oncall;
   return std::make_unique<facebook::cachelib::CacheAdmin>(cache, adminConfig);
 }
 
-std::unique_ptr<facebook::cachelib::LruAllocator> make_lru_allocator(
+std::unique_ptr<LruAllocator> make_lru_allocator(
     std::unique_ptr<LruAllocatorConfig> config) {
-  return std::make_unique<facebook::cachelib::LruAllocator>(*config);
+  return std::make_unique<LruAllocator>(*config);
 }
-std::unique_ptr<facebook::cachelib::LruAllocator> make_shm_lru_allocator(
+std::unique_ptr<LruAllocator> make_shm_lru_allocator(
     std::unique_ptr<LruAllocatorConfig> config) {
-  return std::make_unique<facebook::cachelib::LruAllocator>(
-      facebook::cachelib::LruAllocator::SharedMemNewT::SharedMemNew, *config);
+  return std::make_unique<LruAllocator>(
+      LruAllocator::SharedMemNewT::SharedMemNew, *config);
 }
 std::unique_ptr<LruAllocatorConfig> make_lru_allocator_config() {
   return std::make_unique<LruAllocatorConfig>();
@@ -130,14 +130,13 @@ void set_base_address(LruAllocatorConfig& config, size_t addr) {
   config.slabMemoryBaseAddr = (void*)addr;
 }
 
-int8_t add_pool(const facebook::cachelib::LruAllocator& cache,
+int8_t add_pool(const LruAllocator& cache,
                 folly::StringPiece name,
                 size_t size) {
-  return const_cast<facebook::cachelib::LruAllocator&>(cache).addPool(name,
-                                                                      size);
+  return const_cast<LruAllocator&>(cache).addPool(name, size);
 }
 
-size_t get_unreserved_size(const facebook::cachelib::LruAllocator& cache) {
+size_t get_unreserved_size(const LruAllocator& cache) {
   return cache.getCacheMemoryStats().unReservedSize;
 }
 
@@ -148,20 +147,16 @@ const uint8_t* get_memory(const LruItemHandle& handle) {
 uint8_t* get_writable_memory(LruItemHandle& handle) {
   return static_cast<uint8_t*>(handle->getMemory());
 }
-size_t get_item_ptr_as_offset(const facebook::cachelib::LruAllocator& cache,
-                              const uint8_t* ptr) {
-  return const_cast<facebook::cachelib::LruAllocator&>(cache)
-      .getItemPtrAsOffset(ptr);
+size_t get_item_ptr_as_offset(const LruAllocator& cache, const uint8_t* ptr) {
+  return const_cast<LruAllocator&>(cache).getItemPtrAsOffset(ptr);
 }
 
-std::unique_ptr<LruItemHandle> allocate_item(
-    const facebook::cachelib::LruAllocator& cache,
-    facebook::cachelib::PoolId id,
-    folly::StringPiece key,
-    size_t size,
-    uint32_t ttlSecs) {
-  auto item = const_cast<facebook::cachelib::LruAllocator&>(cache).allocate(
-      id, key, size, ttlSecs);
+std::unique_ptr<LruItemHandle> allocate_item(const LruAllocator& cache,
+                                             facebook::cachelib::PoolId id,
+                                             folly::StringPiece key,
+                                             uint32_t size,
+                                             uint32_t ttlSecs) {
+  auto item = const_cast<LruAllocator&>(cache).allocate(id, key, size, ttlSecs);
   if (item) {
     return std::make_unique<LruItemHandle>(std::move(item));
   } else {
@@ -169,22 +164,20 @@ std::unique_ptr<LruItemHandle> allocate_item(
   }
 }
 
-bool insert_handle(const facebook::cachelib::LruAllocator& cache,
-                   LruItemHandle& handle) {
-  return const_cast<facebook::cachelib::LruAllocator&>(cache).insert(handle);
+bool insert_handle(const LruAllocator& cache, LruItemHandle& handle) {
+  return const_cast<LruAllocator&>(cache).insert(handle);
 }
-void insert_or_replace_handle(const facebook::cachelib::LruAllocator& cache,
+void insert_or_replace_handle(const LruAllocator& cache,
                               LruItemHandle& handle) {
-  const_cast<facebook::cachelib::LruAllocator&>(cache).insertOrReplace(handle);
+  const_cast<LruAllocator&>(cache).insertOrReplace(handle);
 }
 
-void remove_item(const facebook::cachelib::LruAllocator& cache,
-                 folly::StringPiece key) {
-  const_cast<facebook::cachelib::LruAllocator&>(cache).remove(key);
+void remove_item(const LruAllocator& cache, folly::StringPiece key) {
+  const_cast<LruAllocator&>(cache).remove(key);
 }
-std::unique_ptr<LruItemHandle> find_item(
-    const facebook::cachelib::LruAllocator& cache, folly::StringPiece key) {
-  auto item = const_cast<facebook::cachelib::LruAllocator&>(cache).find(key);
+std::unique_ptr<LruItemHandle> find_item(const LruAllocator& cache,
+                                         folly::StringPiece key) {
+  auto item = const_cast<LruAllocator&>(cache).find(key);
   if (item) {
     // TODO(jiayueb) remove toWriteHandle() after finishing R/W handle migration
     return std::make_unique<LruItemHandle>(std::move(item).toWriteHandle());
@@ -192,28 +185,24 @@ std::unique_ptr<LruItemHandle> find_item(
     return std::unique_ptr<LruItemHandle>();
   }
 }
-size_t get_pool_size(const facebook::cachelib::LruAllocator& cache,
-                     facebook::cachelib::PoolId id) {
+size_t get_pool_size(const LruAllocator& cache, facebook::cachelib::PoolId id) {
   return cache.getPool(id).getPoolSize();
 }
-bool grow_pool(const facebook::cachelib::LruAllocator& cache,
+bool grow_pool(const LruAllocator& cache,
                facebook::cachelib::PoolId id,
                size_t size) {
-  return const_cast<facebook::cachelib::LruAllocator&>(cache).growPool(id,
-                                                                       size);
+  return const_cast<LruAllocator&>(cache).growPool(id, size);
 }
-bool shrink_pool(const facebook::cachelib::LruAllocator& cache,
+bool shrink_pool(const LruAllocator& cache,
                  facebook::cachelib::PoolId id,
                  size_t size) {
-  return const_cast<facebook::cachelib::LruAllocator&>(cache).shrinkPool(id,
-                                                                         size);
+  return const_cast<LruAllocator&>(cache).shrinkPool(id, size);
 }
-bool resize_pools(const facebook::cachelib::LruAllocator& cache,
+bool resize_pools(const LruAllocator& cache,
                   facebook::cachelib::PoolId src,
                   facebook::cachelib::PoolId dst,
                   size_t size) {
-  return const_cast<facebook::cachelib::LruAllocator&>(cache).resizePools(
-      src, dst, size);
+  return const_cast<LruAllocator&>(cache).resizePools(src, dst, size);
 }
 } // namespace cachelib
 } // namespace rust
diff --git a/cachelib/rust/src/cachelib.h b/cachelib/rust/src/cachelib.h
index f7695299d6..a51de7e043 100644
--- a/cachelib/rust/src/cachelib.h
+++ b/cachelib/rust/src/cachelib.h
@@ -26,14 +26,15 @@
 namespace facebook {
 namespace rust {
 namespace cachelib {
-using LruAllocatorConfig = facebook::cachelib::LruAllocator::Config;
-using LruItemHandle = facebook::cachelib::LruAllocator::WriteHandle;
+using LruAllocator = facebook::cachelib::LruAllocator;
+using LruAllocatorConfig = LruAllocator::Config;
+using LruItemHandle = LruAllocator::WriteHandle;
 
 std::unique_ptr<facebook::cachelib::CacheAdmin> make_cacheadmin(
-    facebook::cachelib::LruAllocator& cache, const std::string& oncall);
-std::unique_ptr<facebook::cachelib::LruAllocator> make_lru_allocator(
+    LruAllocator& cache, const std::string& oncall);
+std::unique_ptr<LruAllocator> make_lru_allocator(
     std::unique_ptr<LruAllocatorConfig> config);
-std::unique_ptr<facebook::cachelib::LruAllocator> make_shm_lru_allocator(
+std::unique_ptr<LruAllocator> make_shm_lru_allocator(
     std::unique_ptr<LruAllocatorConfig> config);
 std::unique_ptr<LruAllocatorConfig> make_lru_allocator_config();
 
@@ -83,42 +84,36 @@ void enable_cache_persistence(LruAllocatorConfig& config,
 
 void set_base_address(LruAllocatorConfig& config, size_t addr);
 
-int8_t add_pool(const facebook::cachelib::LruAllocator& cache,
+int8_t add_pool(const LruAllocator& cache,
                 folly::StringPiece name,
                 size_t size);
-size_t get_unreserved_size(const facebook::cachelib::LruAllocator& cache);
+size_t get_unreserved_size(const LruAllocator& cache);
 
 size_t get_size(const LruItemHandle& handle);
 const uint8_t* get_memory(const LruItemHandle& handle);
 uint8_t* get_writable_memory(LruItemHandle& handle);
-size_t get_item_ptr_as_offset(const facebook::cachelib::LruAllocator& cache,
-                              const uint8_t* ptr);
-
-std::unique_ptr<LruItemHandle> allocate_item(
-    const facebook::cachelib::LruAllocator& cache,
-    facebook::cachelib::PoolId id,
-    folly::StringPiece key,
-    size_t size,
-    uint32_t ttlSecs);
-
-bool insert_handle(const facebook::cachelib::LruAllocator& cache,
-                   LruItemHandle& handle);
-void insert_or_replace_handle(const facebook::cachelib::LruAllocator& cache,
-                              LruItemHandle& handle);
-
-void remove_item(const facebook::cachelib::LruAllocator& cache,
-                 folly::StringPiece key);
-std::unique_ptr<LruItemHandle> find_item(
-    const facebook::cachelib::LruAllocator& cache, folly::StringPiece key);
-size_t get_pool_size(const facebook::cachelib::LruAllocator& cache,
-                     facebook::cachelib::PoolId id);
-bool grow_pool(const facebook::cachelib::LruAllocator& cache,
+size_t get_item_ptr_as_offset(const LruAllocator& cache, const uint8_t* ptr);
+
+std::unique_ptr<LruItemHandle> allocate_item(const LruAllocator& cache,
+                                             facebook::cachelib::PoolId id,
+                                             folly::StringPiece key,
+                                             uint32_t size,
+                                             uint32_t ttlSecs);
+
+bool insert_handle(const LruAllocator& cache, LruItemHandle& handle);
+void insert_or_replace_handle(const LruAllocator& cache, LruItemHandle& handle);
+
+void remove_item(const LruAllocator& cache, folly::StringPiece key);
+std::unique_ptr<LruItemHandle> find_item(const LruAllocator& cache,
+                                         folly::StringPiece key);
+size_t get_pool_size(const LruAllocator& cache, facebook::cachelib::PoolId id);
+bool grow_pool(const LruAllocator& cache,
                facebook::cachelib::PoolId id,
                size_t size);
-bool shrink_pool(const facebook::cachelib::LruAllocator& cache,
+bool shrink_pool(const LruAllocator& cache,
                  facebook::cachelib::PoolId id,
                  size_t size);
-bool resize_pools(const facebook::cachelib::LruAllocator& cache,
+bool resize_pools(const LruAllocator& cache,
                   facebook::cachelib::PoolId src,
                   facebook::cachelib::PoolId dst,
                   size_t size);
diff --git a/cachelib/rust/src/lib.rs b/cachelib/rust/src/lib.rs
index 2eaca32d84..172765367b 100644
--- a/cachelib/rust/src/lib.rs
+++ b/cachelib/rust/src/lib.rs
@@ -47,7 +47,6 @@ mod ffi {
             oncall: &CxxString,
         ) -> Result<UniquePtr<CacheAdmin>>;
 
-        #[namespace = "facebook::cachelib"]
         type LruAllocator;
         fn make_lru_allocator(
             config: UniquePtr<LruAllocatorConfig>,
@@ -137,7 +136,7 @@ mod ffi {
             cache: &LruAllocator,
             id: i8,
             key: StringPiece<'_>,
-            size: usize,
+            size: u32,
             ttl_secs: u32,
         ) -> Result<UniquePtr<LruItemHandle>>;
 
diff --git a/cachelib/rust/src/lrucache.rs b/cachelib/rust/src/lrucache.rs
index bf7ddfcb49..110d5bebf9 100644
--- a/cachelib/rust/src/lrucache.rs
+++ b/cachelib/rust/src/lrucache.rs
@@ -28,6 +28,7 @@ use std::sync::Mutex;
 use std::sync::RwLock;
 use std::time::Duration;
 
+use anyhow::Context;
 use anyhow::Error;
 use anyhow::Result;
 use bytes::buf::UninitSlice;
@@ -720,6 +721,7 @@ impl LruCachePool {
             .map_or(0, |d| std::cmp::min(d.as_secs(), 1))
             .try_into()
             .unwrap_or(u32::MAX);
+        let size = size.try_into().context("Cache allocation too large")?;
         let handle = ffi::allocate_item(cache, self.pool, key, size, ttl_secs)?;
         if handle.is_null() {
             Ok(None)
diff --git a/cachelib/shm/ShmCommon.cpp b/cachelib/shm/ShmCommon.cpp
index 898d43bf37..f7163f26d8 100644
--- a/cachelib/shm/ShmCommon.cpp
+++ b/cachelib/shm/ShmCommon.cpp
@@ -63,7 +63,7 @@ size_t pageAligned(size_t size, PageSizeT p) {
 namespace {
 std::vector<folly::StringPiece> getSmapLines(const std::string& smapContent) {
   std::vector<folly::StringPiece> lines;
-  folly::split("\n", smapContent, lines, true);
+  folly::split('\n', smapContent, lines, true);
   XDCHECK(!lines.empty());
   return lines;
 }
@@ -81,14 +81,14 @@ bool lineAddressMatches(folly::StringPiece line, uintptr_t addr) {
 
   std::vector<folly::StringPiece> tokens;
   // split into tokens by space
-  folly::split(" ", line, tokens, /* ignore empty */ true);
+  folly::split(' ', line, tokens, /* ignore empty */ true);
 
   XDCHECK(!tokens.empty());
   folly::StringPiece startAddr;
   folly::StringPiece endAddr;
 
   // split the first token using the '-' separator
-  if (!folly::split("-", tokens[0], startAddr, endAddr)) {
+  if (!folly::split('-', tokens[0], startAddr, endAddr)) {
     throw std::invalid_argument(
         folly::sformat("Invalid address field {}", tokens[0]));
   }
@@ -103,7 +103,7 @@ bool isAddressLine(folly::StringPiece line) {
   // address lines contain lots of fields before the first :
   // 006de000-01397000 rw-p 00000000 00:00 0                          [heap]
   folly::StringPiece first, second;
-  folly::split(":", line, first, second);
+  folly::split(':', line, first, second);
   return first.find(' ') != std::string::npos;
 }
 
@@ -133,7 +133,7 @@ PageSizeT getPageSizeInSMap(void* addr) {
     // Format is the following
     // KernelPageSize:        4 kB
     folly::StringPiece fieldName, value;
-    folly::split(":", line, fieldName, value);
+    folly::split(':', line, fieldName, value);
     if (fieldName != "MMUPageSize") {
       continue;
     }
@@ -142,7 +142,7 @@ PageSizeT getPageSizeInSMap(void* addr) {
 
     folly::StringPiece sizeVal;
     folly::StringPiece unitVal;
-    folly::split(" ", value, sizeVal, unitVal);
+    folly::split(' ', value, sizeVal, unitVal);
     XDCHECK_EQ(unitVal, "kB");
     size_t size = folly::to<size_t>(sizeVal) * 1024;
     if (size == getPageSize(PageSizeT::TWO_MB)) {
diff --git a/cachelib/shm/shm.thrift b/cachelib/shm/shm.thrift
index 0372d7c8f7..7022947224 100644
--- a/cachelib/shm/shm.thrift
+++ b/cachelib/shm/shm.thrift
@@ -17,6 +17,6 @@
 namespace cpp2 facebook.cachelib.serialization
 
 struct ShmManagerObject {
-  1: required byte shmVal,
-  3: required map<string, string> nameToKeyMap,
+  1: required byte shmVal;
+  3: required map<string, string> nameToKeyMap;
 }
diff --git a/contrib/build-package.sh b/contrib/build-package.sh
index ff487967cb..755933bd44 100755
--- a/contrib/build-package.sh
+++ b/contrib/build-package.sh
@@ -102,11 +102,12 @@ test "$#" -eq 0 \
     && die "missing dependancy name to build. See -h for help"
 
 ######################################
-## Check which dependecy was requested
+## Check which dependency was requested
 ######################################
 
 external_git_clone=
 external_git_branch=
+# external_git_tag can also be used for commit hashes
 external_git_tag=
 update_submodules=
 cmake_custom_params=
@@ -160,6 +161,7 @@ case "$1" in
     REPODIR=cachelib/external/$NAME
     SRCDIR=$REPODIR
     external_git_clone=yes
+    external_git_tag="8.0.1"
     cmake_custom_params="-DBUILD_SHARED_LIBS=ON"
     if test "$build_tests" = "yes" ; then
         cmake_custom_params="$cmake_custom_params -DFMT_TEST=YES"
@@ -174,7 +176,10 @@ case "$1" in
     REPODIR=cachelib/external/$NAME
     SRCDIR=$REPODIR/build/cmake
     external_git_clone=yes
-    external_git_branch=release
+    # Previously, we pinned to release branch. v1.5.4 needed
+    # CMake >= 3.18, later reverted. While waiting for v1.5.5,
+    # pin to the fix: https://github.com/facebook/zstd/pull/3510
+    external_git_tag=8420502e
     if test "$build_tests" = "yes" ; then
         cmake_custom_params="-DZSTD_BUILD_TESTS=ON"
     else
diff --git a/contrib/prerequisites-arch.sh b/contrib/prerequisites-arch.sh
index 85a8656f7b..249f6c8082 100755
--- a/contrib/prerequisites-arch.sh
+++ b/contrib/prerequisites-arch.sh
@@ -19,4 +19,5 @@ sudo pacman -S --needed --noconfirm cmake \
   boost \
   double-conversion \
   libdwarf \
+  numactl \
   libsodium
diff --git a/contrib/prerequisites-fedora32.sh b/contrib/prerequisites-fedora32.sh
index 235d6c1a8a..942cac0470 100755
--- a/contrib/prerequisites-fedora32.sh
+++ b/contrib/prerequisites-fedora32.sh
@@ -21,6 +21,7 @@ sudo dnf -y install bison flex patch bzip2 cmake \
   zlib-devel lz4-devel xz-devel bzip2-devel \
   jemalloc-devel snappy-devel libsodium-devel libdwarf-devel libaio-devel \
   gmock-devel gflags-devel gtest gtest-devel \
+  numactl-devel \
   fmt fmt-devel
 
 # DO NOT INSTALL glog-devel - need to build from source for the glog-*.cmake files
diff --git a/contrib/prerequisites-fedora34.sh b/contrib/prerequisites-fedora34.sh
index 7e45c8740d..c7182cc513 100755
--- a/contrib/prerequisites-fedora34.sh
+++ b/contrib/prerequisites-fedora34.sh
@@ -19,4 +19,5 @@ sudo dnf -y install bison flex patch bzip2 cmake \
   double-conversion double-conversion-devel make g++ \
   boost-devel libevent-devel openssl-devel libunwind-devel \
   zlib-devel lz4-devel xz-devel bzip2-devel \
-  jemalloc-devel snappy-devel libsodium-devel libdwarf-devel libaio-devel
+  jemalloc-devel snappy-devel libsodium-devel libdwarf-devel libaio-devel \
+  numactl-devel
diff --git a/contrib/prerequisites-rocky9.sh b/contrib/prerequisites-rocky9.sh
index bec5b82011..06720aba2e 100755
--- a/contrib/prerequisites-rocky9.sh
+++ b/contrib/prerequisites-rocky9.sh
@@ -38,7 +38,8 @@ sudo dnf install -y \
   jemalloc-devel \
   libsodium-devel \
   libaio-devel \
-  binutils-devel
+  binutils-devel \
+  numactl-devel
 
 
 sudo dnf install -y \
diff --git a/website/docs/Cache_Library_Architecture_Guide/Overview_A_random_walk_down_the_Cache_Library.md b/website/docs/Cache_Library_Architecture_Guide/Overview_A_random_walk_down_the_Cache_Library.md
index f13fc13222..cf4e750c45 100644
--- a/website/docs/Cache_Library_Architecture_Guide/Overview_A_random_walk_down_the_Cache_Library.md
+++ b/website/docs/Cache_Library_Architecture_Guide/Overview_A_random_walk_down_the_Cache_Library.md
@@ -107,7 +107,7 @@ There will be a section discussing each of the bullets below.
    * For regular cache: Find the item in the chained hash map. From the item, get the slab it lives on and form the slab, identify the allocation class and promote the item on that particular LRU queue. Increment the refcount and return the item handle.
 ## Flash overview
 
-Flash is organized in a similar way: there is a cache for smaller items (BigHash) and for larger item (Block Cache). Unlike DRAM, the client does not get to choose where the item goes. It's done automatically thresholding the size. Together, this constitutes [Navy](/docs/Cache_Library_Architecture_Guide/Navy_Architecture_Overview ) -- the flash cache engine of CacheLib.
+Flash is organized in a similar way: there is a cache for smaller items (BigHash) and for larger item (Block Cache). Unlike DRAM, the client does not get to choose where the item goes. It's done automatically thresholding the size. Together, this constitutes [Navy](/docs/Cache_Library_Architecture_Guide/Navy_Overview ) -- the flash cache engine of CacheLib.
 
 * "block device" refers to devices that's read/write happen with a fixed size block (if it helps, substitute the word "page" here). It means you can't write with precision of bytes but have to incur overhead if you don't write an entire block.
 
diff --git a/website/docs/Cache_Library_User_Guides/Cachebench_FB_HW_eval.md b/website/docs/Cache_Library_User_Guides/Cachebench_FB_HW_eval.md
index ce99649133..d17b7ac522 100644
--- a/website/docs/Cache_Library_User_Guides/Cachebench_FB_HW_eval.md
+++ b/website/docs/Cache_Library_User_Guides/Cachebench_FB_HW_eval.md
@@ -17,10 +17,11 @@ sufficient free memory (50+GB) and SSD capacity (1TB).
 * SSD Capacity: 100GB or more available capacity
 * Internet connection capable of accessing github.com and installing packages
 
-## Set up the SSD devices using mdraid
+## Set up the SSD devices
 
-To gather SSD performance metrics, the SSD must be setup first. An example
-below sets up a raid device to handle two ssds being used by CacheBench.
+To gather SSD performance metrics, the SSD must be setup first. Cachebench (and CacheLib) supports using various types of devices for NVM cache including a raw block device or a regular file. When one wants to use multiple SSDs as NVM cache, the CacheLib also provides a native support for RAID0 (i.e., striping).
+
+Optionally, as an example, an user can setup and use md devices as follows. In this example, the md device is created from two ssd devices to be used as a raw block device in CacheBench.
 
 ```sh
 mdadm --create /dev/md0 --force --raid-devices=2 --level=0 --chunk=256 /dev/nvme1n1 /dev/nvme2n1
@@ -142,7 +143,7 @@ mdadm --create /dev/md0 --force --raid-devices=2 --level=0 --chunk=256 /dev/nvme
     make install
     ```
 
-See [build and installation](/docs/installation/installation) for further details.
+See [build and installation](/docs/installation) for further details.
 
 ## Running the benchmark for SSD perf testing
 
@@ -196,7 +197,6 @@ For a full list of options that can be configured, see [configuring cachebench](
    using the `--progress` and specifying a duration in seconds.
    If `--progress-stats-file` is also specified, on every progress
    interval, `cachebench` would log the internal stats to the specified file.
-   
 ## Running cachebench with the trace workload
 
 Meta is sharing anonymized traces captured from large scale production cache services. These traces are licensed under the same license as CacheLib. They are meant to help academic and industry researchers to optimize for our caching workloads. One can freely download it from our AWS S3 bucket and run the CacheBench to replay the trace with varying configuration as follows.
diff --git a/website/docs/Cache_Library_User_Guides/Cachebench_Overview.md b/website/docs/Cache_Library_User_Guides/Cachebench_Overview.md
index eb72646542..8c878e1be6 100644
--- a/website/docs/Cache_Library_User_Guides/Cachebench_Overview.md
+++ b/website/docs/Cache_Library_User_Guides/Cachebench_Overview.md
@@ -53,6 +53,6 @@ developer's need. The following are few examples.
 
 ## Building  cachebench
 
-Follow instructions in [Installation](/docs/installation/installation) to build
+Follow instructions in [Installation](/docs/installation) to build
 cachebench. This should install cachebench in your local machine under
 ```opt/cachelib/bin/cachebench```
diff --git a/website/docs/Cache_Library_User_Guides/eviction_policy.md b/website/docs/Cache_Library_User_Guides/eviction_policy.md
index 356a3cb198..cce947619a 100644
--- a/website/docs/Cache_Library_User_Guides/eviction_policy.md
+++ b/website/docs/Cache_Library_User_Guides/eviction_policy.md
@@ -23,7 +23,7 @@ The second modification is promotion delay. Normally every access item is moved
 How often does cachelib refresh a previously accessed item. By default this is 60 seconds.
 
 * `updateOnWrite`/`updateOnRead`
-Specifies if a LRU promotion happens on read or write or both. As a rule of thumb, for most services that care primarily about read performance, turn on `updateOnRead`. However, if your service cares a lot about retention time of items that are recently written, then turn on `updateOnWrite` as well.
+Specifies if a LRU promotion happens on read or write or both. As a rule of thumb, for most services that care primarily about read performance, turn on `updateOnRead`. However, if your service cares a lot about retention time of items that are recently written, then turn on `updateOnWrite` as well. By default, `updateOnRead = true` and `updateOnWrite = false`.
 
 * `ipSpec`
 This essentially turns the LRU into a two-segmented LRU. Setting this to `1` means every new insertion will be inserted 1/2 from the end of the LRU, `2` means 1/4 from the end of the LRU, and so on.
diff --git a/website/docs/installation/testing.md b/website/docs/installation/testing.md
index 02b2cb747c..d8730127b4 100644
--- a/website/docs/installation/testing.md
+++ b/website/docs/installation/testing.md
@@ -11,7 +11,7 @@ of the cache infrastructure.
 ## Building CacheLib Unit Tests
 
 To build the cachelib unit tests, use one of the following commands
-(see [installation](docs/installation/installation) instructions for more details):
+(see [installation](/docs/installation) instructions for more details):
 
 1. Use `./contrib/build.sh` script with the `-T` option.
 2. Use `./contrib/build-package.sh -t cachelib` (with the `-t` option)
@@ -42,7 +42,7 @@ Running a single unit test binary:
 
 ```sh
 $ cd opt/cachelib/tests
-$ ./allocator-test-ItemTest 
+$ ./allocator-test-ItemTest
 [==========] Running 6 tests from 1 test suite.
 [----------] Global test environment set-up.
 [----------] 6 tests from ItemTest
diff --git a/website/package.json b/website/package.json
index 8c58fda9a2..ac9801eeeb 100644
--- a/website/package.json
+++ b/website/package.json
@@ -43,7 +43,8 @@
     "ansi-html": "0.0.8",
     "ua-parser-js": "^1.0.33",
     "eta": "^2.0.0",
-    "http-cache-semantics": "^4.1.1"
+    "http-cache-semantics": "^4.1.1",
+    "@braintree/sanitize-url": "^6.0.1"
   },
   "browserslist": {
     "production": [
diff --git a/website/src/pages/index.js b/website/src/pages/index.js
index 151ec3f3fb..5886c079f9 100644
--- a/website/src/pages/index.js
+++ b/website/src/pages/index.js
@@ -117,7 +117,7 @@ function Home() {
                 'button button--secondary button--lg',
                 styles.getStarted,
               )}
-              to={ useBaseUrl('docs/installation/installation') }>
+              to={ useBaseUrl('docs/installation') }>
               Get Started
             </Link>
     </div>
diff --git a/website/yarn.lock b/website/yarn.lock
index 19f12eb0d5..51e55da8c7 100644
--- a/website/yarn.lock
+++ b/website/yarn.lock
@@ -1809,10 +1809,10 @@
     "@babel/helper-validator-identifier" "^7.18.6"
     to-fast-properties "^2.0.0"
 
-"@braintree/sanitize-url@^6.0.0":
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/@braintree/sanitize-url/-/sanitize-url-6.0.0.tgz#fe364f025ba74f6de6c837a84ef44bdb1d61e68f"
-  integrity sha512-mgmE7XBYY/21erpzhexk4Cj1cyTQ9LzvnTxtzM17BJ7ERMNE6W72mQRo0I1Ud8eFJ+RVVIcBNhLFZ3GX4XFz5w==
+"@braintree/sanitize-url@^6.0.0", "@braintree/sanitize-url@^6.0.1":
+  version "6.0.2"
+  resolved "https://registry.yarnpkg.com/@braintree/sanitize-url/-/sanitize-url-6.0.2.tgz#6110f918d273fe2af8ea1c4398a88774bb9fc12f"
+  integrity sha512-Tbsj02wXCbqGmzdnXNk0SOF19ChhRU70BsroIi4Pm6Ehp56in6vch94mfbdQ17DozxkL3BAVjbZ4Qc1a0HFRAg==
 
 "@colors/colors@1.5.0":
   version "1.5.0"