MB-51956: Remove the QueueDirtyWithManyClosedUnrefCheckpoints bench

The test was designed to measure the contention between frontend writes and the (old) CheckpointRemoverTask performing checkpoint removal. Since Neo the latter doens't exist anymore, replaced by eager checkpoint removal. Actually there is a code path that can potentially do a bulk checkpoint removal (also of many checkpoints, once MB-50984 is done): that is checkpoint removal triggered by CursorDrop. But note that that code path calls down into CM::extractClosedUnrefCheckpoints(), which is the old O(N) bit now O(1) and already covered by the ExtractClosedUnrefCheckpoints bench. Change-Id: I42c004b7d1c755ff5db45fffd1301526090180fe Reviewed-on: https://review.couchbase.org/c/kv_engine/+/174312 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Dave Rigby <daver@couchbase.com>
couchbase · May 11, 2022 · 24d2160 · 24d2160
1 parent 4492280
commit 24d2160
Showing 1 changed file with 0 additions and 135 deletions.
diff --git a/engines/ep/benchmarks/vbucket_bench.cc b/engines/ep/benchmarks/vbucket_bench.cc
@@ -316,136 +316,6 @@ BENCHMARK_DEFINE_F(VBucketBench, CreateDeleteStoredValue)
     }
 }
 
-/*
- * MB-31834: Load throughput degradation when the number of checkpoints
- * eligible for removing is high.
- * At both checkpoint-removing and CM:queueDirty we acquire the CM::queueLock.
- * If the number of checkpoints eligible for removing is high, then any slow
- * operation under lock in CheckpointRemover delays frontend operations.
- * Note that the CheckpointRemover is O(N) in the size of the
- * CM::checkpointList. The regression is caused by a change in MB-30916 where we
- * started with deallocating checkpoint memory under lock.
- *
- * This benchmark measures resource contention between a mc:worker (frontend
- * thread) executing CM::queueDirty and the CheckpointMemRecoveryTask
- * when the number of checkpoint eligible for removing is high.
- */
-BENCHMARK_DEFINE_F(CheckpointBench, QueueDirtyWithManyClosedUnrefCheckpoints)
-(benchmark::State& state) {
-    // Test approach:
-    // - Fix the number of checkpoints to be removed and run the
-    //     CheckpointRemover in a background thread.
-    // - Fix the number of checkpoint to be removed at each CheckpointRemover
-    //     run (must be in the order of 10^3 to catch the regression scenario).
-    // - Enqueue items into the CheckpointMaanager in the frontend thread.
-    //     Break when the CheckpointRemover has done. Measure (A) the number
-    //     of items enqueued and (B) the runtime of the frontend thread.
-    // - Output the average runtime of frontend operations (B/A), which is the
-    //     measured metric for this benchmark
-
-    ASSERT_EQ(1, state.max_iterations);
-
-    const size_t numCheckpoints = state.range(0);
-    const size_t numCkptToRemovePerIteration = state.range(1);
-
-    auto* vb = engine->getKVBucket()->getVBucket(vbid).get();
-    auto* ckptMgr = vb->checkpointManager.get();
-
-    // Same queued_item used for both checkpointList pre-filling and
-    // front-end queueDirty().
-    // Note that we will generate many 1-item checkpoints even if we enqueue
-    // always the same identical item. That is because we have
-    // checkpoint_max_size=1 in configuration, which leads to the following
-    // order of steps at every call to CM::queueDirty:
-    // 1) close the open checkpoint
-    // 2) create a new open checkpoint
-    // 3) enqueue the new mutation (note that de-duplication happens here).
-    //     The new mutation will be inserted into the /new/ (empty) open
-    //     checkpoint. So, there will be no de-duplication.
-    queued_item qi{
-            new Item(StoredDocKey(std::string("key"), CollectionID::Default),
-                     vbid,
-                     queue_op::mutation,
-                     /*revSeq*/ 0,
-                     /*bySeq*/ 0)};
-
-    // Pre-fill CM with the defined number of checkpoints
-    for (size_t i = 0; i < numCheckpoints; ++i) {
-        ckptMgr->queueDirty(qi,
-                            GenerateBySeqno::Yes,
-                            GenerateCas::Yes,
-                            /*preLinkDocCtx*/ nullptr);
-    }
-
-    ThreadGate tg(2);
-
-    // Note: numUnrefItems is also the number of removed checkpoints as
-    //     we have 1 item per checkpoint.
-    size_t numUnrefItems = 0;
-    size_t numCkptRemoverRuns = 0;
-    std::atomic<bool> bgDone{false};
-    auto removeCkpt = [&tg,
-                       ckptMgr,
-                       vb,
-                       numCkptToRemovePerIteration,
-                       &numUnrefItems,
-                       &numCheckpoints,
-                       &numCkptRemoverRuns,
-                       &bgDone]() {
-        tg.threadUp();
-        while (true) {
-            // Simulate the Flusher, this makes the per-iteration num of
-            // checkpoints eligible for removal (last/open checkpoint excluded).
-            std::vector<queued_item> items;
-            ckptMgr->getItemsForPersistence(items, numCkptToRemovePerIteration);
-
-            numUnrefItems += ckptMgr->removeClosedUnrefCheckpoints().count;
-            numCkptRemoverRuns++;
-
-            // Break when all but the last item (in last checkpoint) is removed
-            if (numUnrefItems >= numCheckpoints - 1) {
-                break;
-            }
-        }
-        // Done, exit frontend thread
-        bgDone = true;
-    };
-
-    // Note: thread started but still blocked on ThreadGate
-    std::thread bgThread(removeCkpt);
-
-    size_t itemsQueuedTotal = 0;
-    size_t runtime = 0;
-    while (state.KeepRunning()) {
-        tg.threadUp();
-        auto begin = std::chrono::steady_clock::now();
-        while (!bgDone) {
-            ckptMgr->queueDirty(qi,
-                                GenerateBySeqno::Yes,
-                                GenerateCas::Yes,
-                                /*preLinkDocCtx*/ nullptr);
-            itemsQueuedTotal++;
-        }
-
-        runtime = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                          std::chrono::steady_clock::now() - begin)
-                          .count();
-    }
-    ASSERT_TRUE(itemsQueuedTotal);
-
-    state.counters["NumCheckpointsRemoverRuns"] = numCkptRemoverRuns;
-    state.counters["NumCheckpointsRemovedPerIteration"] =
-            numUnrefItems / numCkptRemoverRuns;
-    state.counters["ItemsEnqueued"] = itemsQueuedTotal;
-    // Clang-scan-build complains about a possible division on 0.. guess
-    // it doesn't know that the ASSERT_TRUE above would terminate the method
-    if (itemsQueuedTotal > 0) {
-        state.counters["AvgQueueDirtyRuntime"] = runtime / itemsQueuedTotal;
-    }
-
-    bgThread.join();
-}
-
 CheckpointList CheckpointBench::extractClosedUnrefCheckpoints(
         CheckpointManager& manager) {
     std::lock_guard<std::mutex> lh(manager.queueLock);
@@ -684,11 +554,6 @@ static void FlushArguments(benchmark::internal::Benchmark* b) {
 BENCHMARK_REGISTER_F(MemTrackingVBucketBench, FlushVBucket)
         ->Apply(FlushArguments);
 
-// Arguments: numCheckpoints, numCkptToRemovePerIteration
-BENCHMARK_REGISTER_F(CheckpointBench, QueueDirtyWithManyClosedUnrefCheckpoints)
-        ->Args({1000000, 1000})
-        ->Iterations(1);
-
 // The following benchs aim to show the asymptotic behaviour of the specific
 // function under test. In particular, we want to show that functions are
 // constant-complexity and don't degrade when the number of checkpoints in CM