Skip to content

Commit

Permalink
Add local thread pool to CCheckQueue
Browse files Browse the repository at this point in the history
Backport of bitcoin/bitcoin#18710

PR message:

    This PR:

    gets rid of boost::thread_group in the CCheckQueue class
    allows thread safety annotation usage in the CCheckQueue class
    is alternative to #14464 (#18710 (comment), #18710 (comment))
    Also, with this PR (I hope) it could be easier to resurrect a bunch of brilliant ideas from #9938.

    Related: #17307

Squashed commits:

0ef938685b5c079a6f5a98daf0e3865d718d817b refactor: Use member initializers in CCheckQueue
01511776acb0c7ec216dc9c8112531067763f1cb Add local thread pool to CCheckQueue
dba30695fc42f45828db008e7e5b81cb2b5d8551 test: Use CCheckQueue local thread pool
6784ac471bb32b6bb8e2de60986f123eb4990706 bench: Use CCheckQueue local thread pool
bb6fcc75d1ec94b733d1477c816351c50be5faf9 refactor: Drop boost::thread stuff in CCheckQueue

Also in this commit:

Slight modification to our custom CCheckQueue_32MB benchmark to use the
new API.

Signed-off-by: Calin Culianu <calin.culianu@gmail.com>
  • Loading branch information
hebasto authored and cculianu committed Jun 7, 2021
1 parent 6d5e21a commit 0e94c77
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 107 deletions.
24 changes: 7 additions & 17 deletions src/bench/checkqueue.cpp
Expand Up @@ -16,8 +16,6 @@
#include <util/system.h>
#include <validation.h>

#include <boost/thread/thread.hpp>

#include <limits>
#include <utility>
#include <vector>
Expand All @@ -43,10 +41,7 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State &state) {
void swap(PrevectorJob &x) { p.swap(x.p); };
};
CCheckQueue<PrevectorJob> queue{QUEUE_BATCH_SIZE};
boost::thread_group tg;
for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) {
tg.create_thread([&] { queue.Thread(); });
}
queue.StartWorkerThreads(std::max(MIN_CORES, GetNumCores()));
while (state.KeepRunning()) {
// Make insecure_rand here so that each iteration is identical.
FastRandomContext insecure_rand(true);
Expand All @@ -63,8 +58,7 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State &state) {
// for clarity
control.Wait();
}
tg.interrupt_all();
tg.join_all();
queue.StopWorkerThreads();
}

static void CCheckQueue_RealData32MB(bool cacheSigs, benchmark::State &state) {
Expand Down Expand Up @@ -132,19 +126,15 @@ static void CCheckQueue_RealData32MB(bool cacheSigs, benchmark::State &state) {

// Step 3: Setup threads for our CCheckQueue
CCheckQueue<CScriptCheck> queue{QUEUE_BATCH_SIZE};
boost::thread_group tg;
int nThreads = gArgs.GetArg("-par", DEFAULT_SCRIPTCHECK_THREADS);
const int nCores = std::max(GetNumCores(), 1);
if (!nThreads) nThreads = nCores;
else if (nThreads < 0) nThreads = std::max(0, nCores + nThreads); // negative means leave n cores free
LogPrintf("%s: Using %d threads for signature verification\n", __func__, nThreads);
else if (nThreads < 0) nThreads = std::max(1, nCores + nThreads); // negative means leave n cores free
LogPrintf("%s: Using %d thread%s for signature verification\n", __func__, nThreads, nThreads != 1 ? "s" : "");
--nThreads; // account for the fact that this main thread also does processing in .Wait() below
for (int i = 0; i < nThreads; ++i) {
tg.create_thread([&] { queue.Thread(); });
}
Defer d([&tg]{
tg.interrupt_all();
tg.join_all();
queue.StartWorkerThreads(nThreads);
Defer d([&queue]{
queue.StopWorkerThreads();
});

// And finally: Run the benchmark
Expand Down
94 changes: 63 additions & 31 deletions src/checkqueue.h
@@ -1,18 +1,17 @@
// Copyright (c) 2012-2018 The Bitcoin Core developers
// Copyright (c) 2012-2021 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef BITCOIN_CHECKQUEUE_H
#define BITCOIN_CHECKQUEUE_H

#include <sync.h>
#include <tinyformat.h>
#include <util/threadnames.h>

#include <algorithm>
#include <vector>

#include <boost/thread/condition_variable.hpp>
#include <boost/thread/mutex.hpp>

template <typename T> class CCheckQueueControl;

/**
Expand All @@ -28,47 +27,50 @@ template <typename T> class CCheckQueueControl;
template <typename T> class CCheckQueue {
private:
//! Mutex to protect the inner state
boost::mutex mutex;
Mutex m_mutex;

//! Worker threads block on this when out of work
boost::condition_variable condWorker;
std::condition_variable m_worker_cv;

//! Master thread blocks on this when out of work
boost::condition_variable condMaster;
std::condition_variable m_master_cv;

//! The queue of elements to be processed.
//! As the order of booleans doesn't matter, it is used as a LIFO (stack)
std::vector<T> queue;
std::vector<T> queue GUARDED_BY(m_mutex);

//! The number of workers (including the master) that are idle.
int nIdle;
int nIdle GUARDED_BY(m_mutex){0};

//! The total number of workers (including the master).
int nTotal;
int nTotal GUARDED_BY(m_mutex){0};

//! The temporary evaluation result.
bool fAllOk;
bool fAllOk GUARDED_BY(m_mutex){true};

/**
* Number of verifications that haven't completed yet.
* This includes elements that are no longer queued, but still in the
* worker's own batches.
*/
unsigned int nTodo;
unsigned int nTodo GUARDED_BY(m_mutex){0};

//! The maximum number of elements to be processed in one batch
unsigned int nBatchSize;
const unsigned int nBatchSize;

std::vector<std::thread> m_worker_threads;
bool m_request_stop GUARDED_BY(m_mutex){false};

/** Internal function that does bulk of the verification work. */
bool Loop(bool fMaster = false) {
boost::condition_variable &cond = fMaster ? condMaster : condWorker;
bool Loop(bool fMaster) {
std::condition_variable& cond = fMaster ? m_master_cv : m_worker_cv;
std::vector<T> vChecks;
vChecks.reserve(nBatchSize);
unsigned int nNow = 0;
bool fOk = true;
do {
{
boost::unique_lock<boost::mutex> lock(mutex);
WAIT_LOCK(m_mutex, lock);
// first do the clean-up of the previous loop run (allowing us
// to do it in the same critsect)
if (nNow) {
Expand All @@ -77,14 +79,14 @@ template <typename T> class CCheckQueue {
if (nTodo == 0 && !fMaster) {
// We processed the last element; inform the master it
// can exit and return the result
condMaster.notify_one();
m_master_cv.notify_one();
}
} else {
// first iteration
nTotal++;
}
// logically, the do loop starts here
while (queue.empty()) {
while (queue.empty() && !m_request_stop) {
if (fMaster && nTodo == 0) {
nTotal--;
bool fRet = fAllOk;
Expand All @@ -97,6 +99,10 @@ template <typename T> class CCheckQueue {
cond.wait(lock); // wait
nIdle--;
}
if (m_request_stop) {
return false;
}

// Decide how many work units to process now.
// * Do not try to do everything at once, but aim for
// increasingly smaller batches so all workers finish
Expand Down Expand Up @@ -131,36 +137,62 @@ template <typename T> class CCheckQueue {

public:
//! Mutex to ensure only one concurrent CCheckQueueControl
boost::mutex ControlMutex;
Mutex m_control_mutex;

//! Create a new check queue
explicit CCheckQueue(unsigned int nBatchSizeIn)
: nIdle(0), nTotal(0), fAllOk(true), nTodo(0),
nBatchSize(nBatchSizeIn) {}

//! Worker thread
void Thread() { Loop(); }
: nBatchSize(nBatchSizeIn) {}

//! Create a pool of new worker threads.
void StartWorkerThreads(const int threads_num)
{
{
LOCK(m_mutex);
nIdle = 0;
nTotal = 0;
fAllOk = true;
}
assert(m_worker_threads.empty());
for (int n = 0; n < threads_num; ++n) {
m_worker_threads.emplace_back([this, n]() {
util::ThreadRename(strprintf("scriptch.%i", n));
Loop(false /* worker thread */);
});
}
}

//! Wait until execution finishes, and return whether all evaluations were
//! successful.
bool Wait() { return Loop(true); }
bool Wait() { return Loop(true /* master thread */); }

//! Add a batch of checks to the queue
void Add(std::vector<T> &vChecks) {
boost::unique_lock<boost::mutex> lock(mutex);
LOCK(m_mutex);
for (T &check : vChecks) {
queue.push_back(T());
check.swap(queue.back());
}
nTodo += vChecks.size();
if (vChecks.size() == 1) {
condWorker.notify_one();
m_worker_cv.notify_one();
} else if (vChecks.size() > 1) {
condWorker.notify_all();
m_worker_cv.notify_all();
}
}

//! Stop all of the worker threads.
void StopWorkerThreads()
{
WITH_LOCK(m_mutex, m_request_stop = true);
m_worker_cv.notify_all();
for (std::thread& t : m_worker_threads) {
t.join();
}
m_worker_threads.clear();
WITH_LOCK(m_mutex, m_request_stop = false);
}

~CCheckQueue() {}
~CCheckQueue() { assert(m_worker_threads.empty()); }
};

/**
Expand All @@ -180,7 +212,7 @@ template <typename T> class CCheckQueueControl {
: pqueue(pqueueIn), fDone(false) {
// passed queue is supposed to be unused, or nullptr
if (pqueue != nullptr) {
ENTER_CRITICAL_SECTION(pqueue->ControlMutex);
ENTER_CRITICAL_SECTION(pqueue->m_control_mutex);
}
}

Expand All @@ -204,7 +236,7 @@ template <typename T> class CCheckQueueControl {
Wait();
}
if (pqueue != nullptr) {
LEAVE_CRITICAL_SECTION(pqueue->ControlMutex);
LEAVE_CRITICAL_SECTION(pqueue->m_control_mutex);
}
}
};
Expand Down
5 changes: 2 additions & 3 deletions src/init.cpp
Expand Up @@ -247,6 +247,7 @@ void Shutdown(NodeContext &node) {
// the CScheduler/checkqueue threadGroup
threadGroup.interrupt_all();
threadGroup.join_all();
StopScriptCheckWorkerThreads();

// After the threads that potentially access these pointers have been
// stopped, destruct and reset all to nullptr.
Expand Down Expand Up @@ -2105,9 +2106,7 @@ bool AppInitMain(Config &config, RPCServer &rpcServer,
LogPrintf("Script verification uses %d additional threads\n", script_threads);
if (script_threads >= 1) {
g_parallel_script_checks = true;
for (int i = 0; i < script_threads; ++i) {
threadGroup.create_thread([i]() { return ThreadScriptCheck(i); });
}
StartScriptCheckWorkerThreads(script_threads);
}

// Start the lightweight task scheduler thread
Expand Down

0 comments on commit 0e94c77

Please sign in to comment.