Add local thread pool to CCheckQueue

Backport of bitcoin/bitcoin#18710 PR message: This PR: gets rid of boost::thread_group in the CCheckQueue class allows thread safety annotation usage in the CCheckQueue class is alternative to #14464 (#18710 (comment), #18710 (comment)) Also, with this PR (I hope) it could be easier to resurrect a bunch of brilliant ideas from #9938. Related: #17307 Squashed commits: 0ef938685b5c079a6f5a98daf0e3865d718d817b refactor: Use member initializers in CCheckQueue 01511776acb0c7ec216dc9c8112531067763f1cb Add local thread pool to CCheckQueue dba30695fc42f45828db008e7e5b81cb2b5d8551 test: Use CCheckQueue local thread pool 6784ac471bb32b6bb8e2de60986f123eb4990706 bench: Use CCheckQueue local thread pool bb6fcc75d1ec94b733d1477c816351c50be5faf9 refactor: Drop boost::thread stuff in CCheckQueue Also in this commit: Slight modification to our custom CCheckQueue_32MB benchmark to use the new API. Signed-off-by: Calin Culianu <calin.culianu@gmail.com>
bitcoin-cash-node · Jun 7, 2021 · 0e94c77 · 0e94c77
1 parent 6d5e21a
commit 0e94c77
Show file tree

Hide file tree

Showing 9 changed files with 100 additions and 107 deletions.
diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp
@@ -16,8 +16,6 @@
 #include <util/system.h>
 #include <validation.h>
 
-#include <boost/thread/thread.hpp>
-
 #include <limits>
 #include <utility>
 #include <vector>
@@ -43,10 +41,7 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State &state) {
         void swap(PrevectorJob &x) { p.swap(x.p); };
     };
     CCheckQueue<PrevectorJob> queue{QUEUE_BATCH_SIZE};
-    boost::thread_group tg;
-    for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) {
-        tg.create_thread([&] { queue.Thread(); });
-    }
+    queue.StartWorkerThreads(std::max(MIN_CORES, GetNumCores()));
     while (state.KeepRunning()) {
         // Make insecure_rand here so that each iteration is identical.
         FastRandomContext insecure_rand(true);
@@ -63,8 +58,7 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State &state) {
         // for clarity
         control.Wait();
     }
-    tg.interrupt_all();
-    tg.join_all();
+    queue.StopWorkerThreads();
 }
 
 static void CCheckQueue_RealData32MB(bool cacheSigs, benchmark::State &state) {
@@ -132,19 +126,15 @@ static void CCheckQueue_RealData32MB(bool cacheSigs, benchmark::State &state) {
 
     // Step 3: Setup threads for our CCheckQueue
     CCheckQueue<CScriptCheck> queue{QUEUE_BATCH_SIZE};
-    boost::thread_group tg;
     int nThreads = gArgs.GetArg("-par", DEFAULT_SCRIPTCHECK_THREADS);
     const int nCores = std::max(GetNumCores(), 1);
     if (!nThreads) nThreads = nCores;
-    else if (nThreads < 0) nThreads = std::max(0, nCores + nThreads); // negative means leave n cores free
-    LogPrintf("%s: Using %d threads for signature verification\n", __func__, nThreads);
+    else if (nThreads < 0) nThreads = std::max(1, nCores + nThreads); // negative means leave n cores free
+    LogPrintf("%s: Using %d thread%s for signature verification\n", __func__, nThreads, nThreads != 1 ? "s" : "");
     --nThreads; // account for the fact that this main thread also does processing in .Wait() below
-    for (int i = 0; i < nThreads; ++i) {
-        tg.create_thread([&] { queue.Thread(); });
-    }
-    Defer d([&tg]{
-        tg.interrupt_all();
-        tg.join_all();
+    queue.StartWorkerThreads(nThreads);
+    Defer d([&queue]{
+        queue.StopWorkerThreads();
     });
 
     // And finally: Run the benchmark

diff --git a/src/checkqueue.h b/src/checkqueue.h
@@ -1,18 +1,17 @@
-// Copyright (c) 2012-2018 The Bitcoin Core developers
+// Copyright (c) 2012-2021 The Bitcoin Core developers
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #ifndef BITCOIN_CHECKQUEUE_H
 #define BITCOIN_CHECKQUEUE_H
 
 #include <sync.h>
+#include <tinyformat.h>
+#include <util/threadnames.h>
 
 #include <algorithm>
 #include <vector>
 
-#include <boost/thread/condition_variable.hpp>
-#include <boost/thread/mutex.hpp>
-
 template <typename T> class CCheckQueueControl;
 
 /**
@@ -28,47 +27,50 @@ template <typename T> class CCheckQueueControl;
 template <typename T> class CCheckQueue {
 private:
     //! Mutex to protect the inner state
-    boost::mutex mutex;
+    Mutex m_mutex;
 
     //! Worker threads block on this when out of work
-    boost::condition_variable condWorker;
+    std::condition_variable m_worker_cv;
 
     //! Master thread blocks on this when out of work
-    boost::condition_variable condMaster;
+    std::condition_variable m_master_cv;
 
     //! The queue of elements to be processed.
     //! As the order of booleans doesn't matter, it is used as a LIFO (stack)
-    std::vector<T> queue;
+    std::vector<T> queue GUARDED_BY(m_mutex);
 
     //! The number of workers (including the master) that are idle.
-    int nIdle;
+    int nIdle GUARDED_BY(m_mutex){0};
 
     //! The total number of workers (including the master).
-    int nTotal;
+    int nTotal GUARDED_BY(m_mutex){0};
 
     //! The temporary evaluation result.
-    bool fAllOk;
+    bool fAllOk GUARDED_BY(m_mutex){true};
 
     /**
      * Number of verifications that haven't completed yet.
      * This includes elements that are no longer queued, but still in the
      * worker's own batches.
      */
-    unsigned int nTodo;
+    unsigned int nTodo GUARDED_BY(m_mutex){0};
 
     //! The maximum number of elements to be processed in one batch
-    unsigned int nBatchSize;
+    const unsigned int nBatchSize;
+
+    std::vector<std::thread> m_worker_threads;
+    bool m_request_stop GUARDED_BY(m_mutex){false};
 
     /** Internal function that does bulk of the verification work. */
-    bool Loop(bool fMaster = false) {
-        boost::condition_variable &cond = fMaster ? condMaster : condWorker;
+    bool Loop(bool fMaster) {
+        std::condition_variable& cond = fMaster ? m_master_cv : m_worker_cv;
         std::vector<T> vChecks;
         vChecks.reserve(nBatchSize);
         unsigned int nNow = 0;
         bool fOk = true;
         do {
             {
-                boost::unique_lock<boost::mutex> lock(mutex);
+                WAIT_LOCK(m_mutex, lock);
                 // first do the clean-up of the previous loop run (allowing us
                 // to do it in the same critsect)
                 if (nNow) {
@@ -77,14 +79,14 @@ template <typename T> class CCheckQueue {
                     if (nTodo == 0 && !fMaster) {
                         // We processed the last element; inform the master it
                         // can exit and return the result
-                        condMaster.notify_one();
+                        m_master_cv.notify_one();
                     }
                 } else {
                     // first iteration
                     nTotal++;
                 }
                 // logically, the do loop starts here
-                while (queue.empty()) {
+                while (queue.empty() && !m_request_stop) {
                     if (fMaster && nTodo == 0) {
                         nTotal--;
                         bool fRet = fAllOk;
@@ -97,6 +99,10 @@ template <typename T> class CCheckQueue {
                     cond.wait(lock); // wait
                     nIdle--;
                 }
+                if (m_request_stop) {
+                    return false;
+                }
+
                 // Decide how many work units to process now.
                 // * Do not try to do everything at once, but aim for
                 // increasingly smaller batches so all workers finish
@@ -131,36 +137,62 @@ template <typename T> class CCheckQueue {
 
 public:
     //! Mutex to ensure only one concurrent CCheckQueueControl
-    boost::mutex ControlMutex;
+    Mutex m_control_mutex;
 
     //! Create a new check queue
     explicit CCheckQueue(unsigned int nBatchSizeIn)
-        : nIdle(0), nTotal(0), fAllOk(true), nTodo(0),
-          nBatchSize(nBatchSizeIn) {}
-
-    //! Worker thread
-    void Thread() { Loop(); }
+        : nBatchSize(nBatchSizeIn) {}
+
+    //! Create a pool of new worker threads.
+    void StartWorkerThreads(const int threads_num)
+    {
+        {
+             LOCK(m_mutex);
+             nIdle = 0;
+             nTotal = 0;
+             fAllOk = true;
+         }
+         assert(m_worker_threads.empty());
+         for (int n = 0; n < threads_num; ++n) {
+             m_worker_threads.emplace_back([this, n]() {
+                 util::ThreadRename(strprintf("scriptch.%i", n));
+                 Loop(false /* worker thread */);
+             });
+         }
+    }
 
     //! Wait until execution finishes, and return whether all evaluations were
     //! successful.
-    bool Wait() { return Loop(true); }
+    bool Wait() { return Loop(true /* master thread */); }
 
     //! Add a batch of checks to the queue
     void Add(std::vector<T> &vChecks) {
-        boost::unique_lock<boost::mutex> lock(mutex);
+        LOCK(m_mutex);
         for (T &check : vChecks) {
             queue.push_back(T());
             check.swap(queue.back());
         }
         nTodo += vChecks.size();
         if (vChecks.size() == 1) {
-            condWorker.notify_one();
+            m_worker_cv.notify_one();
         } else if (vChecks.size() > 1) {
-            condWorker.notify_all();
+            m_worker_cv.notify_all();
+        }
+    }
+
+    //! Stop all of the worker threads.
+    void StopWorkerThreads()
+    {
+        WITH_LOCK(m_mutex, m_request_stop = true);
+        m_worker_cv.notify_all();
+        for (std::thread& t : m_worker_threads) {
+            t.join();
         }
+        m_worker_threads.clear();
+        WITH_LOCK(m_mutex, m_request_stop = false);
     }
 
-    ~CCheckQueue() {}
+    ~CCheckQueue() { assert(m_worker_threads.empty()); }
 };
 
 /**
@@ -180,7 +212,7 @@ template <typename T> class CCheckQueueControl {
         : pqueue(pqueueIn), fDone(false) {
         // passed queue is supposed to be unused, or nullptr
         if (pqueue != nullptr) {
-            ENTER_CRITICAL_SECTION(pqueue->ControlMutex);
+            ENTER_CRITICAL_SECTION(pqueue->m_control_mutex);
         }
     }
 
@@ -204,7 +236,7 @@ template <typename T> class CCheckQueueControl {
             Wait();
         }
         if (pqueue != nullptr) {
-            LEAVE_CRITICAL_SECTION(pqueue->ControlMutex);
+            LEAVE_CRITICAL_SECTION(pqueue->m_control_mutex);
         }
     }
 };

diff --git a/src/init.cpp b/src/init.cpp
@@ -247,6 +247,7 @@ void Shutdown(NodeContext &node) {
     // the CScheduler/checkqueue threadGroup
     threadGroup.interrupt_all();
     threadGroup.join_all();
+    StopScriptCheckWorkerThreads();
 
     // After the threads that potentially access these pointers have been
     // stopped, destruct and reset all to nullptr.
@@ -2105,9 +2106,7 @@ bool AppInitMain(Config &config, RPCServer &rpcServer,
     LogPrintf("Script verification uses %d additional threads\n", script_threads);
     if (script_threads >= 1) {
         g_parallel_script_checks = true;
-        for (int i = 0; i < script_threads; ++i) {
-            threadGroup.create_thread([i]() { return ThreadScriptCheck(i); });
-        }
+        StartScriptCheckWorkerThreads(script_threads);
     }
 
     // Start the lightweight task scheduler thread