Merge 166dfe0 into 0dca141

andrewtoth · web-flow · commit cedc3ec29f2f · 2024-12-04T01:57:52.000Z
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,9 +8,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - network: signet
-            timeout: 20
-            utxo_path: /var/lib/bitcoin/utxo-signet-160000.dat
           - network: mainnet
             timeout: 600
             utxo_path: /var/lib/bitcoin/utxo-840000.dat
diff --git a/.github/workflows/publish-results.yml b/.github/workflows/publish-results.yml
@@ -12,7 +12,7 @@ jobs:
       contents: write
       checks: read
     env:
-      NETWORKS: "signet,mainnet"
+      NETWORKS: "mainnet"
     steps:
       - uses: actions/checkout@v4
         with:
diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt
@@ -31,6 +31,7 @@ add_executable(bench_bitcoin
   gcs_filter.cpp
   hashpadding.cpp
   index_blockfilter.cpp
+  inputfetcher.cpp
   load_external.cpp
   lockedpool.cpp
   logging.cpp
diff --git a/src/bench/inputfetcher.cpp b/src/bench/inputfetcher.cpp
@@ -0,0 +1,57 @@
+// Copyright (c) 2024-present The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <bench/bench.h>
+#include <bench/data/block413567.raw.h>
+#include <coins.h>
+#include <common/system.h>
+#include <inputfetcher.h>
+#include <primitives/block.h>
+#include <serialize.h>
+#include <streams.h>
+#include <util/time.h>
+
+static constexpr auto QUEUE_BATCH_SIZE{128};
+static constexpr auto DELAY{2ms};
+
+//! Simulates a DB by adding a delay when calling GetCoin
+class DelayedCoinsView : public CCoinsView
+{
+private:
+    std::chrono::milliseconds m_delay;
+
+public:
+    DelayedCoinsView(std::chrono::milliseconds delay) : m_delay(delay) {}
+
+    std::optional<Coin> GetCoin(const COutPoint& outpoint) const override
+    {
+        UninterruptibleSleep(m_delay);
+        return Coin{};
+    }
+
+    bool BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashBlock) override { return true; }
+};
+
+static void InputFetcherBenchmark(benchmark::Bench& bench)
+{
+    DataStream stream{benchmark::data::block413567};
+    CBlock block;
+    stream >> TX_WITH_WITNESS(block);
+
+    DelayedCoinsView db(DELAY);
+    CCoinsViewCache cache(&db);
+
+    // The main thread should be counted to prevent thread oversubscription, and
+    // to decrease the variance of benchmark results.
+    const auto worker_threads_num{GetNumCores() - 1};
+    InputFetcher fetcher{QUEUE_BATCH_SIZE, worker_threads_num};
+
+    bench.run([&] {
+        const auto ok{cache.Flush()};
+        assert(ok);
+        fetcher.FetchInputs(cache, db, block);
+    });
+}
+
+BENCHMARK(InputFetcherBenchmark, benchmark::PriorityLevel::HIGH);
diff --git a/src/coins.cpp b/src/coins.cpp
@@ -109,14 +109,17 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi
            (bool)it->second.coin.IsCoinBase());
 }
 
-void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin) {
-    cachedCoinsUsage += coin.DynamicMemoryUsage();
+void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin, bool set_dirty) {
+    const auto mem_usage{coin.DynamicMemoryUsage()};
     auto [it, inserted] = cacheCoins.emplace(
         std::piecewise_construct,
         std::forward_as_tuple(std::move(outpoint)),
         std::forward_as_tuple(std::move(coin)));
     if (inserted) {
-        it->second.AddFlags(CCoinsCacheEntry::DIRTY, *it, m_sentinel);
+        cachedCoinsUsage += mem_usage;
+        if (set_dirty) {
+            it->second.AddFlags(CCoinsCacheEntry::DIRTY, *it, m_sentinel);
+        }
     }
 }
 
diff --git a/src/coins.h b/src/coins.h
@@ -417,12 +417,13 @@ class CCoinsViewCache : public CCoinsViewBacked
 
     /**
      * Emplace a coin into cacheCoins without performing any checks, marking
-     * the emplaced coin as dirty.
+     * the emplaced coin as dirty unless `set_dirty` is `false`.
      *
-     * NOT FOR GENERAL USE. Used only when loading coins from a UTXO snapshot.
+     * NOT FOR GENERAL USE. Used when loading coins from a UTXO snapshot, and
+     * in the InputFetcher.
      * @sa ChainstateManager::PopulateAndValidateSnapshot()
      */
-    void EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin);
+    void EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin, bool set_dirty = true);
 
     /**
      * Spend a coin. Pass moveto in order to get the deleted data.
diff --git a/src/inputfetcher.h b/src/inputfetcher.h
@@ -0,0 +1,239 @@
+// Copyright (c) 2024-present The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_INPUTFETCHER_H
+#define BITCOIN_INPUTFETCHER_H
+
+#include <coins.h>
+#include <sync.h>
+#include <tinyformat.h>
+#include <txdb.h>
+#include <util/hasher.h>
+#include <util/threadnames.h>
+#include <util/transaction_identifier.h>
+
+#include <cstdint>
+#include <stdexcept>
+#include <thread>
+#include <unordered_set>
+#include <vector>
+
+/**
+ * Input fetcher for fetching inputs from the CoinsDB and inserting
+ * into the CoinsTip.
+ *
+ * The main thread loops through the block and writes all input prevouts to a
+ * global vector. It then wakes all workers and starts working as well. Each
+ * thread assigns itself a range of outpoints from the shared vector, and
+ * fetches the coins from disk. The outpoint and coin pairs are written to a
+ * thread local vector of pairs. Once all outpoints are fetched, the main thread
+ * loops through all thread local vectors and writes the pairs to the cache.
+ */
+class InputFetcher
+{
+private:
+    //! Mutex to protect the inner state
+    Mutex m_mutex{};
+    //! Worker threads block on this when out of work
+    std::condition_variable m_worker_cv{};
+    //! Main thread blocks on this when out of work
+    std::condition_variable m_main_cv{};
+
+    /**
+     * The outpoints to be fetched from disk.
+     * This is written to on the main thread, then read from all worker
+     * threads only after the main thread is done writing. Hence, it doesn't
+     * need to be guarded by a lock.
+     */
+    std::vector<COutPoint> m_outpoints{};
+    /**
+     * The index of the last outpoint that is being fetched. Workers assign
+     * themselves a range of outpoints to fetch from m_outpoints. They will use
+     * this index as the end of their range, and then set this index to the
+     * beginning of their range for the next worker. Once it is zero, the next
+     * worker will wait on the condition variable.
+     */
+    size_t m_last_outpoint_index GUARDED_BY(m_mutex){0};
+
+    //! The set of txids of the transactions in the current block being fetched.
+    std::unordered_set<Txid, SaltedTxidHasher> m_txids{};
+    //! The vector of thread local vectors of pairs to be written to the cache.
+    std::vector<std::vector<std::pair<COutPoint, Coin>>> m_pairs{};
+
+    /**
+     * Number of outpoint fetches that haven't completed yet.
+     * This includes outpoints that are no longer queued, but still in the
+     * worker's own batches.
+     */
+    int32_t m_in_flight_fetches_count GUARDED_BY(m_mutex){0};
+    //! The number of worker threads that are waiting on m_worker_cv
+    int32_t m_idle_worker_count GUARDED_BY(m_mutex){0};
+    //! The maximum number of outpoints to be processed in one batch
+    const int32_t m_batch_size;
+    //! DB coins view to fetch from.
+    const CCoinsView* m_db{nullptr};
+    //! The cache to check if 
+    const CCoinsViewCache* m_cache{nullptr};
+
+    std::vector<std::thread> m_worker_threads;
+    bool m_request_stop GUARDED_BY(m_mutex){false};
+
+    //! Internal function that does the fetching from disk.
+    void Loop(int32_t index, bool is_main_thread = false) noexcept EXCLUSIVE_LOCKS_REQUIRED(!m_mutex)
+    {
+        int32_t local_batch_size{0};
+        size_t end_index{0};
+        auto& cond{is_main_thread ? m_main_cv : m_worker_cv};
+        do {
+            {
+                WAIT_LOCK(m_mutex, lock);
+                // first do the clean-up of the previous loop run (allowing us to do
+                // it in the same critsect) local_batch_size will only be
+                // truthy after first run.
+                if (local_batch_size) {
+                    m_in_flight_fetches_count -= local_batch_size;
+                    if (m_in_flight_fetches_count == 0 && !is_main_thread) {
+                        m_main_cv.notify_one();
+                    }
+                }
+
+                // logically, the do loop starts here
+                while (m_last_outpoint_index == 0) {
+                    if ((is_main_thread && m_in_flight_fetches_count == 0) || m_request_stop) {
+                        return;
+                    }
+                    ++m_idle_worker_count;
+                    cond.wait(lock);
+                    --m_idle_worker_count;
+                }
+
+                // Assign a batch of outpoints to this thread
+                local_batch_size = std::max(1, std::min(m_batch_size,
+                            static_cast<int32_t>(m_last_outpoint_index /
+                            (m_worker_threads.size() + 1 + m_idle_worker_count))));
+                end_index = m_last_outpoint_index;
+                m_last_outpoint_index -= local_batch_size;
+            }
+
+            std::vector<std::pair<COutPoint, Coin>>& local_pairs{m_pairs[index]};
+            local_pairs.reserve(local_pairs.size() + local_batch_size);
+            try {
+                for (auto i{end_index - local_batch_size}; i < end_index; ++i) {
+                    const auto& outpoint{m_outpoints[i]};
+                    // If an input spends an outpoint from earlier in the
+                    // block, it won't be in the cache yet but it also won't be
+                    // in the db either.
+                    if (m_txids.contains(outpoint.hash)) {
+                        continue;
+                    }
+                    if (m_cache->HaveCoinInCache(outpoint)) {
+                        continue;
+                    }
+                    if (auto coin{m_db->GetCoin(outpoint)}; coin) {
+                        local_pairs.emplace_back(outpoint, std::move(*coin));
+                    } else {
+                        // Missing an input, just break. This block will fail
+                        // validation, so no point in continuing to get coins.
+                        break;
+                    }
+                }
+            } catch (const std::runtime_error& e) {
+                // Database error
+                // This will be handled later in validation.
+                // Continue for now so the main thread can proceed.
+            }
+        } while (true);
+    }
+
+public:
+
+    //! Create a new input fetcher
+    explicit InputFetcher(int32_t batch_size, int32_t worker_thread_count) noexcept
+        : m_batch_size(batch_size)
+    {
+        if (worker_thread_count < 1) {
+            // Don't do anything if there are no worker threads.
+            return;
+        }
+        m_pairs.reserve(worker_thread_count + 1);
+        for (auto n{0}; n < worker_thread_count + 1; ++n) { 
+            m_pairs.emplace_back();
+        }
+        m_worker_threads.reserve(worker_thread_count);
+        for (auto n{0}; n < worker_thread_count; ++n) {
+            m_worker_threads.emplace_back([this, n]() {
+                util::ThreadRename(strprintf("inputfetch.%i", n));
+                Loop(n);
+            });
+        }
+    }
+
+    // Since this class manages its own resources, which is a thread
+    // pool `m_worker_threads`, copy and move operations are not appropriate.
+    InputFetcher(const InputFetcher&) = delete;
+    InputFetcher& operator=(const InputFetcher&) = delete;
+    InputFetcher(InputFetcher&&) = delete;
+    InputFetcher& operator=(InputFetcher&&) = delete;
+
+    //! Fetch all block inputs from db, and insert into cache.
+    void FetchInputs(CCoinsViewCache& cache,
+                     const CCoinsView& db,
+                     const CBlock& block) noexcept
+        EXCLUSIVE_LOCKS_REQUIRED(!m_mutex)
+    {
+        if (m_worker_threads.empty() || block.vtx.size() <= 1) {
+            return;
+        }
+
+        // Set the db and cache to use for this block.
+        m_db = &db;
+        m_cache = &cache;
+
+        // Loop through the inputs of the block and add them to the queue
+        m_txids.reserve(block.vtx.size() - 1);
+        for (const auto& tx : block.vtx) {
+            if (tx->IsCoinBase()) {
+                continue;
+            }
+            m_outpoints.reserve(m_outpoints.size() + tx->vin.size());
+            for (const auto& in : tx->vin) {
+                m_outpoints.emplace_back(in.prevout);
+            }
+            m_txids.emplace(tx->GetHash());
+        }
+        {
+            LOCK(m_mutex);
+            m_last_outpoint_index = m_outpoints.size();
+            m_in_flight_fetches_count = m_outpoints.size();
+        }
+        m_worker_cv.notify_all();
+
+        // Have the main thread work too while we wait for other threads
+        Loop(m_worker_threads.size(), /*is_main_thread=*/true);
+
+        // At this point all threads are done writing to m_pairs, so we can
+        // safely read from it and insert the fetched coins into the cache.
+        for (auto& local_pairs : m_pairs) {
+            for (auto&& [outpoint, coin] : local_pairs) {
+                cache.EmplaceCoinInternalDANGER(std::move(outpoint),
+                                                std::move(coin),
+                                                /*set_dirty=*/false);
+            }
+            local_pairs.clear();
+        }
+        m_txids.clear();
+        m_outpoints.clear();
+    }
+
+    ~InputFetcher()
+    {
+        WITH_LOCK(m_mutex, m_request_stop = true);
+        m_worker_cv.notify_all();
+        for (std::thread& t : m_worker_threads) {
+            t.join();
+        }
+    }
+};
+
+#endif // BITCOIN_INPUTFETCHER_H
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
@@ -70,6 +70,7 @@ add_executable(test_bitcoin
   headers_sync_chainwork_tests.cpp
   httpserver_tests.cpp
   i2p_tests.cpp
+  inputfetcher_tests.cpp
   interfaces_tests.cpp
   key_io_tests.cpp
   key_tests.cpp
diff --git a/src/test/fuzz/CMakeLists.txt b/src/test/fuzz/CMakeLists.txt
@@ -53,6 +53,7 @@ add_executable(fuzz
   hex.cpp
   http_request.cpp
   i2p.cpp
+  inputfetcher.cpp
   integer.cpp
   key.cpp
   key_io.cpp
diff --git a/src/test/fuzz/inputfetcher.cpp b/src/test/fuzz/inputfetcher.cpp
diff --git a/src/test/inputfetcher_tests.cpp b/src/test/inputfetcher_tests.cpp
diff --git a/src/validation.cpp b/src/validation.cpp
diff --git a/src/validation.h b/src/validation.h

Original file line number	Diff line number	Diff line change
`@@ -109,14 +109,17 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi`
`109`	`109`	`(bool)it->second.coin.IsCoinBase());`
`110`	`110`	`}`
`111`	`111`
`112`		`-void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin) {`
`113`		`- cachedCoinsUsage += coin.DynamicMemoryUsage();`
	`112`	`+void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin, bool set_dirty) {`
	`113`	`+ const auto mem_usage{coin.DynamicMemoryUsage()};`
`114`	`114`	`auto [it, inserted] = cacheCoins.emplace(`
`115`	`115`	`std::piecewise_construct,`
`116`	`116`	`std::forward_as_tuple(std::move(outpoint)),`
`117`	`117`	`std::forward_as_tuple(std::move(coin)));`
`118`	`118`	`if (inserted) {`
`119`		`- it->second.AddFlags(CCoinsCacheEntry::DIRTY, *it, m_sentinel);`
	`119`	`+ cachedCoinsUsage += mem_usage;`
	`120`	`+ if (set_dirty) {`
	`121`	`+ it->second.AddFlags(CCoinsCacheEntry::DIRTY, *it, m_sentinel);`
	`122`	`+ }`
`120`	`123`	`}`
`121`	`124`	`}`
`122`	`125`