|
| 1 | +// Copyright (c) 2024-present The Bitcoin Core developers |
| 2 | +// Distributed under the MIT software license, see the accompanying |
| 3 | +// file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 4 | + |
| 5 | +#ifndef BITCOIN_INPUTFETCHER_H |
| 6 | +#define BITCOIN_INPUTFETCHER_H |
| 7 | + |
| 8 | +#include <coins.h> |
| 9 | +#include <sync.h> |
| 10 | +#include <tinyformat.h> |
| 11 | +#include <txdb.h> |
| 12 | +#include <util/hasher.h> |
| 13 | +#include <util/threadnames.h> |
| 14 | +#include <util/transaction_identifier.h> |
| 15 | + |
| 16 | +#include <cstdint> |
| 17 | +#include <stdexcept> |
| 18 | +#include <thread> |
| 19 | +#include <unordered_set> |
| 20 | +#include <vector> |
| 21 | + |
| 22 | +/** |
| 23 | + * Input fetcher for fetching inputs from the CoinsDB and inserting |
| 24 | + * into the CoinsTip. |
| 25 | + * |
| 26 | + * The main thread loops through the block and writes all input prevouts to a |
| 27 | + * global vector. It then wakes all workers and starts working as well. Each |
| 28 | + * thread assigns itself a range of outpoints from the shared vector, and |
| 29 | + * fetches the coins from disk. The outpoint and coin pairs are written to a |
| 30 | + * thread local vector of pairs. Once all outpoints are fetched, the main thread |
| 31 | + * loops through all thread local vectors and writes the pairs to the cache. |
| 32 | + */ |
| 33 | +class InputFetcher |
| 34 | +{ |
| 35 | +private: |
| 36 | + //! Mutex to protect the inner state |
| 37 | + Mutex m_mutex{}; |
| 38 | + //! Worker threads block on this when out of work |
| 39 | + std::condition_variable m_worker_cv{}; |
| 40 | + //! Main thread blocks on this when out of work |
| 41 | + std::condition_variable m_main_cv{}; |
| 42 | + |
| 43 | + /** |
| 44 | + * The outpoints to be fetched from disk. |
| 45 | + * This is written to on the main thread, then read from all worker |
| 46 | + * threads only after the main thread is done writing. Hence, it doesn't |
| 47 | + * need to be guarded by a lock. |
| 48 | + */ |
| 49 | + std::vector<COutPoint> m_outpoints{}; |
| 50 | + /** |
| 51 | + * The index of the last outpoint that is being fetched. Workers assign |
| 52 | + * themselves a range of outpoints to fetch from m_outpoints. They will use |
| 53 | + * this index as the end of their range, and then set this index to the |
| 54 | + * beginning of their range for the next worker. Once it is zero, the next |
| 55 | + * worker will wait on the condition variable. |
| 56 | + */ |
| 57 | + size_t m_last_outpoint_index GUARDED_BY(m_mutex){0}; |
| 58 | + |
| 59 | + //! The set of txids of the transactions in the current block being fetched. |
| 60 | + std::unordered_set<Txid, SaltedTxidHasher> m_txids{}; |
| 61 | + //! The vector of thread local vectors of pairs to be written to the cache. |
| 62 | + std::vector<std::vector<std::pair<COutPoint, Coin>>> m_pairs{}; |
| 63 | + |
| 64 | + /** |
| 65 | + * Number of outpoint fetches that haven't completed yet. |
| 66 | + * This includes outpoints that are no longer queued, but still in the |
| 67 | + * worker's own batches. |
| 68 | + */ |
| 69 | + int32_t m_in_flight_fetches_count GUARDED_BY(m_mutex){0}; |
| 70 | + //! The number of worker threads that are waiting on m_worker_cv |
| 71 | + int32_t m_idle_worker_count GUARDED_BY(m_mutex){0}; |
| 72 | + //! The maximum number of outpoints to be processed in one batch |
| 73 | + const int32_t m_batch_size; |
| 74 | + //! DB coins view to fetch from. |
| 75 | + const CCoinsView* m_db{nullptr}; |
| 76 | + //! The cache to check if |
| 77 | + const CCoinsViewCache* m_cache{nullptr}; |
| 78 | + |
| 79 | + std::vector<std::thread> m_worker_threads; |
| 80 | + bool m_request_stop GUARDED_BY(m_mutex){false}; |
| 81 | + |
| 82 | + //! Internal function that does the fetching from disk. |
| 83 | + void Loop(int32_t index, bool is_main_thread = false) noexcept EXCLUSIVE_LOCKS_REQUIRED(!m_mutex) |
| 84 | + { |
| 85 | + int32_t local_batch_size{0}; |
| 86 | + size_t end_index{0}; |
| 87 | + auto& cond{is_main_thread ? m_main_cv : m_worker_cv}; |
| 88 | + do { |
| 89 | + { |
| 90 | + WAIT_LOCK(m_mutex, lock); |
| 91 | + // first do the clean-up of the previous loop run (allowing us to do |
| 92 | + // it in the same critsect) local_batch_size will only be |
| 93 | + // truthy after first run. |
| 94 | + if (local_batch_size) { |
| 95 | + m_in_flight_fetches_count -= local_batch_size; |
| 96 | + if (m_in_flight_fetches_count == 0 && !is_main_thread) { |
| 97 | + m_main_cv.notify_one(); |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + // logically, the do loop starts here |
| 102 | + while (m_last_outpoint_index == 0) { |
| 103 | + if ((is_main_thread && m_in_flight_fetches_count == 0) || m_request_stop) { |
| 104 | + return; |
| 105 | + } |
| 106 | + ++m_idle_worker_count; |
| 107 | + cond.wait(lock); |
| 108 | + --m_idle_worker_count; |
| 109 | + } |
| 110 | + |
| 111 | + // Assign a batch of outpoints to this thread |
| 112 | + local_batch_size = std::max(1, std::min(m_batch_size, |
| 113 | + static_cast<int32_t>(m_last_outpoint_index / |
| 114 | + (m_worker_threads.size() + 1 + m_idle_worker_count)))); |
| 115 | + end_index = m_last_outpoint_index; |
| 116 | + m_last_outpoint_index -= local_batch_size; |
| 117 | + } |
| 118 | + |
| 119 | + std::vector<std::pair<COutPoint, Coin>>& local_pairs{m_pairs[index]}; |
| 120 | + local_pairs.reserve(local_pairs.size() + local_batch_size); |
| 121 | + try { |
| 122 | + for (auto i{end_index - local_batch_size}; i < end_index; ++i) { |
| 123 | + const auto& outpoint{m_outpoints[i]}; |
| 124 | + // If an input spends an outpoint from earlier in the |
| 125 | + // block, it won't be in the cache yet but it also won't be |
| 126 | + // in the db either. |
| 127 | + if (m_txids.contains(outpoint.hash)) { |
| 128 | + continue; |
| 129 | + } |
| 130 | + if (m_cache->HaveCoinInCache(outpoint)) { |
| 131 | + continue; |
| 132 | + } |
| 133 | + if (auto coin{m_db->GetCoin(outpoint)}; coin) { |
| 134 | + local_pairs.emplace_back(outpoint, std::move(*coin)); |
| 135 | + } else { |
| 136 | + // Missing an input, just break. This block will fail |
| 137 | + // validation, so no point in continuing to get coins. |
| 138 | + break; |
| 139 | + } |
| 140 | + } |
| 141 | + } catch (const std::runtime_error& e) { |
| 142 | + // Database error |
| 143 | + // This will be handled later in validation. |
| 144 | + // Continue for now so the main thread can proceed. |
| 145 | + } |
| 146 | + } while (true); |
| 147 | + } |
| 148 | + |
| 149 | +public: |
| 150 | + |
| 151 | + //! Create a new input fetcher |
| 152 | + explicit InputFetcher(int32_t batch_size, int32_t worker_thread_count) noexcept |
| 153 | + : m_batch_size(batch_size) |
| 154 | + { |
| 155 | + if (worker_thread_count < 1) { |
| 156 | + // Don't do anything if there are no worker threads. |
| 157 | + return; |
| 158 | + } |
| 159 | + m_pairs.reserve(worker_thread_count + 1); |
| 160 | + for (auto n{0}; n < worker_thread_count + 1; ++n) { |
| 161 | + m_pairs.emplace_back(); |
| 162 | + } |
| 163 | + m_worker_threads.reserve(worker_thread_count); |
| 164 | + for (auto n{0}; n < worker_thread_count; ++n) { |
| 165 | + m_worker_threads.emplace_back([this, n]() { |
| 166 | + util::ThreadRename(strprintf("inputfetch.%i", n)); |
| 167 | + Loop(n); |
| 168 | + }); |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + // Since this class manages its own resources, which is a thread |
| 173 | + // pool `m_worker_threads`, copy and move operations are not appropriate. |
| 174 | + InputFetcher(const InputFetcher&) = delete; |
| 175 | + InputFetcher& operator=(const InputFetcher&) = delete; |
| 176 | + InputFetcher(InputFetcher&&) = delete; |
| 177 | + InputFetcher& operator=(InputFetcher&&) = delete; |
| 178 | + |
| 179 | + //! Fetch all block inputs from db, and insert into cache. |
| 180 | + void FetchInputs(CCoinsViewCache& cache, |
| 181 | + const CCoinsView& db, |
| 182 | + const CBlock& block) noexcept |
| 183 | + EXCLUSIVE_LOCKS_REQUIRED(!m_mutex) |
| 184 | + { |
| 185 | + if (m_worker_threads.empty() || block.vtx.size() <= 1) { |
| 186 | + return; |
| 187 | + } |
| 188 | + |
| 189 | + // Set the db and cache to use for this block. |
| 190 | + m_db = &db; |
| 191 | + m_cache = &cache; |
| 192 | + |
| 193 | + // Loop through the inputs of the block and add them to the queue |
| 194 | + m_txids.reserve(block.vtx.size() - 1); |
| 195 | + for (const auto& tx : block.vtx) { |
| 196 | + if (tx->IsCoinBase()) { |
| 197 | + continue; |
| 198 | + } |
| 199 | + m_outpoints.reserve(m_outpoints.size() + tx->vin.size()); |
| 200 | + for (const auto& in : tx->vin) { |
| 201 | + m_outpoints.emplace_back(in.prevout); |
| 202 | + } |
| 203 | + m_txids.emplace(tx->GetHash()); |
| 204 | + } |
| 205 | + { |
| 206 | + LOCK(m_mutex); |
| 207 | + m_last_outpoint_index = m_outpoints.size(); |
| 208 | + m_in_flight_fetches_count = m_outpoints.size(); |
| 209 | + } |
| 210 | + m_worker_cv.notify_all(); |
| 211 | + |
| 212 | + // Have the main thread work too while we wait for other threads |
| 213 | + Loop(m_worker_threads.size(), /*is_main_thread=*/true); |
| 214 | + |
| 215 | + // At this point all threads are done writing to m_pairs, so we can |
| 216 | + // safely read from it and insert the fetched coins into the cache. |
| 217 | + for (auto& local_pairs : m_pairs) { |
| 218 | + for (auto&& [outpoint, coin] : local_pairs) { |
| 219 | + cache.EmplaceCoinInternalDANGER(std::move(outpoint), |
| 220 | + std::move(coin), |
| 221 | + /*set_dirty=*/false); |
| 222 | + } |
| 223 | + local_pairs.clear(); |
| 224 | + } |
| 225 | + m_txids.clear(); |
| 226 | + m_outpoints.clear(); |
| 227 | + } |
| 228 | + |
| 229 | + ~InputFetcher() |
| 230 | + { |
| 231 | + WITH_LOCK(m_mutex, m_request_stop = true); |
| 232 | + m_worker_cv.notify_all(); |
| 233 | + for (std::thread& t : m_worker_threads) { |
| 234 | + t.join(); |
| 235 | + } |
| 236 | + } |
| 237 | +}; |
| 238 | + |
| 239 | +#endif // BITCOIN_INPUTFETCHER_H |
0 commit comments