facebook · pdillinger · Sep 15, 2022 · Sep 16, 2022 · Sep 16, 2022 · Sep 16, 2022
diff --git a/HISTORY.md b/HISTORY.md
@@ -32,6 +32,7 @@
 *  RocksDB does internal auto prefetching if it notices 2 sequential reads if readahead_size is not specified. New option `num_file_reads_for_auto_readahead` is added in BlockBasedTableOptions which indicates after how many sequential reads internal auto prefetching should be start (default is 2).
 * Added new perf context counters `block_cache_standalone_handle_count`, `block_cache_real_handle_count`,`compressed_sec_cache_insert_real_count`, `compressed_sec_cache_insert_dummy_count`, `compressed_sec_cache_uncompressed_bytes`, and `compressed_sec_cache_compressed_bytes`.
 * Memory for blobs which are to be inserted into the blob cache is now allocated using the cache's allocator (see #10628 and #10647).
+* HyperClockCache is an experimental, lock-free Cache alternative for block cache that offers much improved CPU efficiency under high parallel load or high contention, with some caveats. As much as 4.5x higher ops/sec vs. LRUCache has been seen in db_bench under high parallel load.
 
 ### Performance Improvements
 * Iterator performance is improved for `DeleteRange()` users. Internally, iterator will skip to the end of a range tombstone when possible, instead of looping through each key and check individually if a key is range deleted.

diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc
@@ -13,7 +13,6 @@
 #include <set>
 #include <sstream>
 
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db/db_impl/db_impl.h"
 #include "monitoring/histogram.h"
@@ -292,13 +291,12 @@ class CacheBench {
     }
 
     if (FLAGS_cache_type == "clock_cache") {
-      cache_ = ExperimentalNewClockCache(
-          FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
-          false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
-      if (!cache_) {
-        fprintf(stderr, "Clock cache not supported.\n");
-        exit(1);
-      }
+      fprintf(stderr, "Old clock cache implementation has been removed.\n");
+      exit(1);
+    } else if (FLAGS_cache_type == "hyper_clock_cache") {
+      cache_ = HyperClockCacheOptions(FLAGS_cache_size, FLAGS_value_bytes,
+                                      FLAGS_num_shard_bits)
+                   .MakeSharedCache();
     } else if (FLAGS_cache_type == "fast_lru_cache") {
       cache_ = NewFastLRUCache(
           FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,

diff --git a/cache/cache_test.cc b/cache/cache_test.cc
@@ -15,15 +15,14 @@
 #include <string>
 #include <vector>
 
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "port/stack_trace.h"
 #include "test_util/testharness.h"
 #include "util/coding.h"
 #include "util/string_util.h"
 
-// FastLRUCache and ClockCache only support 16-byte keys, so some of
+// FastLRUCache and HyperClockCache only support 16-byte keys, so some of
 // the tests originally wrote for LRUCache do not work on the other caches.
 // Those tests were adapted to use 16-byte keys. We kept the original ones.
 // TODO: Remove the original tests if they ever become unused.
@@ -76,7 +75,7 @@ void EraseDeleter2(const Slice& /*key*/, void* value) {
 }
 
 const std::string kLRU = "lru";
-const std::string kClock = "clock";
+const std::string kHyperClock = "hyper_clock";
 const std::string kFast = "fast";
 
 }  // anonymous namespace
@@ -87,7 +86,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
   static std::string type_;
 
   static void Deleter(const Slice& key, void* v) {
-    if (type_ == kFast || type_ == kClock) {
+    if (type_ == kFast || type_ == kHyperClock) {
       current_->deleted_keys_.push_back(DecodeKey16Bytes(key));
     } else {
       current_->deleted_keys_.push_back(DecodeKey32Bits(key));
@@ -122,10 +121,10 @@ class CacheTest : public testing::TestWithParam<std::string> {
     if (type == kLRU) {
       return NewLRUCache(capacity);
     }
-    if (type == kClock) {
-      return ExperimentalNewClockCache(
-          capacity, estimated_value_size_, -1 /*num_shard_bits*/,
-          false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
+    if (type == kHyperClock) {
+      return HyperClockCacheOptions(
+                 capacity, estimated_value_size_ /*estimated_value_size*/)
+          .MakeSharedCache();
     }
     if (type == kFast) {
       return NewFastLRUCache(
@@ -148,10 +147,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
       co.metadata_charge_policy = charge_policy;
       return NewLRUCache(co);
     }
-    if (type == kClock) {
-      return ExperimentalNewClockCache(capacity, 1 /*estimated_value_size*/,
-                                       num_shard_bits, strict_capacity_limit,
-                                       charge_policy);
+    if (type == kHyperClock) {
+      return HyperClockCacheOptions(capacity, 1 /*estimated_value_size*/,
+                                    num_shard_bits, strict_capacity_limit,
+                                    nullptr /*allocator*/, charge_policy)
+          .MakeSharedCache();
     }
     if (type == kFast) {
       return NewFastLRUCache(capacity, 1 /*estimated_value_size*/,
@@ -163,12 +163,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
 
   // These functions encode/decode keys in tests cases that use
   // int keys.
-  // Currently, FastLRUCache requires keys to be 16B long, whereas
-  // LRUCache and ClockCache don't, so the encoding depends on
-  // the cache type.
+  // Currently, HyperClockCache requires keys to be 16B long, whereas
+  // LRUCache doesn't, so the encoding depends on the cache type.
   std::string EncodeKey(int k) {
     auto type = GetParam();
-    if (type == kFast || type == kClock) {
+    if (type == kFast || type == kHyperClock) {
       return EncodeKey16Bytes(k);
     } else {
       return EncodeKey32Bits(k);
@@ -177,7 +176,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
 
   int DecodeKey(const Slice& k) {
     auto type = GetParam();
-    if (type == kFast || type == kClock) {
+    if (type == kFast || type == kHyperClock) {
       return DecodeKey16Bytes(k);
     } else {
       return DecodeKey32Bits(k);
@@ -242,7 +241,7 @@ TEST_P(CacheTest, UsageTest) {
   auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
   ASSERT_EQ(0, cache->GetUsage());
   size_t baseline_meta_usage = precise_cache->GetUsage();
-  if (type != kClock) {
+  if (type != kHyperClock) {
     ASSERT_EQ(0, baseline_meta_usage);
   }
 
@@ -263,7 +262,7 @@ TEST_P(CacheTest, UsageTest) {
                                     kv_size, DumbDeleter));
     usage += kv_size;
     ASSERT_EQ(usage, cache->GetUsage());
-    if (type == kClock) {
+    if (type == kHyperClock) {
       ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage());
     } else {
       ASSERT_LT(usage, precise_cache->GetUsage());
@@ -293,7 +292,7 @@ TEST_P(CacheTest, UsageTest) {
   ASSERT_GT(kCapacity, cache->GetUsage());
   ASSERT_GT(kCapacity, precise_cache->GetUsage());
   ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
-  if (type != kClock) {
+  if (type != kHyperClock) {
     ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
   } else {
     // estimated value size of 1 is weird for clock cache, because
@@ -319,7 +318,7 @@ TEST_P(CacheTest, PinnedUsageTest) {
   auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
   auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
   size_t baseline_meta_usage = precise_cache->GetUsage();
-  if (type != kClock) {
+  if (type != kHyperClock) {
     ASSERT_EQ(0, baseline_meta_usage);
   }
 
@@ -428,7 +427,7 @@ TEST_P(CacheTest, HitAndMiss) {
   ASSERT_EQ(-1,  Lookup(300));
 
   Insert(100, 102);
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     // ClockCache usually doesn't overwrite on Insert
     ASSERT_EQ(101, Lookup(100));
   } else {
@@ -439,15 +438,15 @@ TEST_P(CacheTest, HitAndMiss) {
 
   ASSERT_EQ(1U, deleted_keys_.size());
   ASSERT_EQ(100, deleted_keys_[0]);
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     ASSERT_EQ(102, deleted_values_[0]);
   } else {
     ASSERT_EQ(101, deleted_values_[0]);
   }
 }
 
 TEST_P(CacheTest, InsertSameKey) {
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     ROCKSDB_GTEST_BYPASS(
         "ClockCache doesn't guarantee Insert overwrite same key.");
     return;
@@ -477,7 +476,7 @@ TEST_P(CacheTest, Erase) {
 }
 
 TEST_P(CacheTest, EntriesArePinned) {
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     ROCKSDB_GTEST_BYPASS(
         "ClockCache doesn't guarantee Insert overwrite same key.");
     return;
@@ -543,7 +542,7 @@ TEST_P(CacheTest, ExternalRefPinsEntries) {
       Insert(1000 + j, 2000 + j);
     }
     // Clock cache is even more stateful and needs more churn to evict
-    if (GetParam() == kClock) {
+    if (GetParam() == kHyperClock) {
       for (int j = 0; j < kCacheSize; j++) {
         Insert(11000 + j, 11000 + j);
       }
@@ -742,9 +741,9 @@ TEST_P(CacheTest, ReleaseWithoutErase) {
 
 TEST_P(CacheTest, SetCapacity) {
   auto type = GetParam();
-  if (type == kFast || type == kClock) {
+  if (type == kFast || type == kHyperClock) {
     ROCKSDB_GTEST_BYPASS(
-        "FastLRUCache and ClockCache don't support arbitrary capacity "
+        "FastLRUCache and HyperClockCache don't support arbitrary capacity "
         "adjustments.");
     return;
   }
@@ -883,7 +882,7 @@ TEST_P(CacheTest, OverCapacity) {
     cache->Release(handles[i]);
   }
 
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     // Make sure eviction is triggered.
     ASSERT_OK(cache->Insert(EncodeKey(-1), nullptr, 1, &deleter, &handles[0]));
 
@@ -1020,7 +1019,8 @@ TEST_P(CacheTest, DefaultShardBits) {
   // Prevent excessive allocation (to save time & space)
   estimated_value_size_ = 100000;
   // Implementations use different minimum shard sizes
-  size_t min_shard_size = (GetParam() == kClock ? 32U * 1024U : 512U) * 1024U;
+  size_t min_shard_size =
+      (GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U;
 
   std::shared_ptr<Cache> cache = NewCache(32U * min_shard_size);
   ShardedCache* sc = dynamic_cast<ShardedCache*>(cache.get());
@@ -1052,11 +1052,8 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
   cache_->Release(h1);
 }
 
-std::shared_ptr<Cache> (*new_clock_cache_func)(size_t, size_t, int, bool,
-                                               CacheMetadataChargePolicy) =
-    ExperimentalNewClockCache;
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
-                        testing::Values(kLRU, kClock, kFast));
+                        testing::Values(kLRU, kHyperClock, kFast));
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
                         testing::Values(kLRU, kFast));
 

diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc
@@ -21,7 +21,7 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-namespace clock_cache {
+namespace hyper_clock_cache {
 
 static_assert(sizeof(ClockHandle) == 64U,
               "Expecting size / alignment with common cache line size");
@@ -1126,9 +1126,10 @@ size_t ClockCacheShard::GetTableAddressCount() const {
   return table_.GetTableSize();
 }
 
-ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
-                       int num_shard_bits, bool strict_capacity_limit,
-                       CacheMetadataChargePolicy metadata_charge_policy)
+HyperClockCache::HyperClockCache(
+    size_t capacity, size_t estimated_value_size, int num_shard_bits,
+    bool strict_capacity_limit,
+    CacheMetadataChargePolicy metadata_charge_policy)
     : ShardedCache(capacity, num_shard_bits, strict_capacity_limit),
       num_shards_(1 << num_shard_bits) {
   assert(estimated_value_size > 0 ||
@@ -1145,7 +1146,7 @@ ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
   }
 }
 
-ClockCache::~ClockCache() {
+HyperClockCache::~HyperClockCache() {
   if (shards_ != nullptr) {
     assert(num_shards_ > 0);
     for (int i = 0; i < num_shards_; i++) {
@@ -1155,41 +1156,42 @@ ClockCache::~ClockCache() {
   }
 }
 
-CacheShard* ClockCache::GetShard(uint32_t shard) {
+CacheShard* HyperClockCache::GetShard(uint32_t shard) {
   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 }
 
-const CacheShard* ClockCache::GetShard(uint32_t shard) const {
+const CacheShard* HyperClockCache::GetShard(uint32_t shard) const {
   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 }
 
-void* ClockCache::Value(Handle* handle) {
+void* HyperClockCache::Value(Handle* handle) {
   return reinterpret_cast<const ClockHandle*>(handle)->value;
 }
 
-size_t ClockCache::GetCharge(Handle* handle) const {
+size_t HyperClockCache::GetCharge(Handle* handle) const {
   return reinterpret_cast<const ClockHandle*>(handle)->total_charge;
 }
 
-Cache::DeleterFn ClockCache::GetDeleter(Handle* handle) const {
+Cache::DeleterFn HyperClockCache::GetDeleter(Handle* handle) const {
   auto h = reinterpret_cast<const ClockHandle*>(handle);
   return h->deleter;
 }
 
-uint32_t ClockCache::GetHash(Handle* handle) const {
+uint32_t HyperClockCache::GetHash(Handle* handle) const {
   return reinterpret_cast<const ClockHandle*>(handle)->hash;
 }
 
-void ClockCache::DisownData() {
+void HyperClockCache::DisownData() {
   // Leak data only if that won't generate an ASAN/valgrind warning.
   if (!kMustFreeHeapAllocations) {
     shards_ = nullptr;
     num_shards_ = 0;
   }
 }
 
-}  // namespace clock_cache
+}  // namespace hyper_clock_cache
 
+// DEPRECATED (see public API)
 std::shared_ptr<Cache> NewClockCache(
     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
     CacheMetadataChargePolicy metadata_charge_policy) {
@@ -1199,22 +1201,20 @@ std::shared_ptr<Cache> NewClockCache(
                      /* low_pri_pool_ratio */ 0.0);
 }
 
-std::shared_ptr<Cache> ExperimentalNewClockCache(
-    size_t capacity, size_t estimated_value_size, int num_shard_bits,
-    bool strict_capacity_limit,
-    CacheMetadataChargePolicy metadata_charge_policy) {
-  if (num_shard_bits >= 20) {
+std::shared_ptr<Cache> HyperClockCacheOptions::MakeSharedCache() const {
+  auto my_num_shard_bits = num_shard_bits;
+  if (my_num_shard_bits >= 20) {
     return nullptr;  // The cache cannot be sharded into too many fine pieces.
   }
-  if (num_shard_bits < 0) {
+  if (my_num_shard_bits < 0) {
     // Use larger shard size to reduce risk of large entries clustering
     // or skewing individual shards.
     constexpr size_t min_shard_size = 32U * 1024U * 1024U;
-    num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
+    my_num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
   }
-  return std::make_shared<clock_cache::ClockCache>(
-      capacity, estimated_value_size, num_shard_bits, strict_capacity_limit,
-      metadata_charge_policy);
+  return std::make_shared<hyper_clock_cache::HyperClockCache>(
+      capacity, estimated_entry_charge, my_num_shard_bits,
+      strict_capacity_limit, metadata_charge_policy);
 }
 
 }  // namespace ROCKSDB_NAMESPACE