Skip to content

Commit

Permalink
rpc: Optimize serialization disk space of dumptxoutset
Browse files Browse the repository at this point in the history
Co-authored-by: TheCharlatan <seb.kung@gmail.com>
  • Loading branch information
aureleoules and TheCharlatan committed Apr 30, 2023
1 parent 91ccb62 commit 7acfc2a
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 48 deletions.
31 changes: 27 additions & 4 deletions src/rpc/blockchain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2672,21 +2672,44 @@ UniValue CreateUTXOSnapshot(

afile << metadata;

std::map<uint256, std::vector<std::pair<uint32_t, Coin>>> mapCoins;
unsigned int iter{0};
COutPoint key;
uint256 last_hash;
Coin coin;
unsigned int iter{0};
std::vector<std::pair<uint32_t, Coin>> coins;

auto write_coins_to_file = [&](AutoFile& afile, const uint256& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins) {
afile << last_hash;
afile << static_cast<uint16_t>(coins.size());
for (auto [vout, coin] : coins) {
afile << vout;
afile << coin;
}
};

pcursor->GetKey(key);
last_hash = key.hash;
while (pcursor->Valid()) {
if (iter % 5000 == 0) node.rpc_interruption_point();
++iter;
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
afile << key;
afile << coin;
if (key.hash == last_hash) {
coins.emplace_back(key.n, coin);
} else {
write_coins_to_file(afile, last_hash, coins);
last_hash = key.hash;
coins.clear();
coins.emplace_back(key.n, coin);
}
}

pcursor->Next();
}

if (!coins.empty()) {
write_coins_to_file(afile, last_hash, coins);
}

afile.fclose();

UniValue result(UniValue::VOBJ);
Expand Down
99 changes: 56 additions & 43 deletions src/validation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5212,64 +5212,76 @@ bool ChainstateManager::PopulateAndValidateSnapshot(

const AssumeutxoData& au_data = *maybe_au_data;

COutPoint outpoint;
Coin coin;
const uint64_t coins_count = metadata.m_coins_count;
uint64_t coins_left = metadata.m_coins_count;

LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString());
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString());
int64_t coins_processed{0};

while (coins_left > 0) {
try {
coins_file >> outpoint;
coins_file >> coin;
} catch (const std::ios_base::failure&) {
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (coin.nHeight > base_height ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
uint256 txid;
coins_file >> txid;
uint16_t size{0};
coins_file >> size;

coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
if(size > coins_left) {
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n");
return false;
}

--coins_left;
++coins_processed;
for (int i = 0; i < size; i++) {
COutPoint outpoint;
Coin coin;
coins_file >> outpoint.n;
coins_file >> coin;
outpoint.hash = txid;
if (coin.nHeight > base_height ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));

if (coins_processed % 1000000 == 0) {
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
coins_processed,
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
}
--coins_left;
++coins_processed;

// Batch write and flush (if we need to) every so often.
//
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
// means <5MB of memory imprecision.
if (coins_processed % 120000 == 0) {
if (ShutdownRequested()) {
return false;
}
if (coins_processed % 1000000 == 0) {
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
coins_processed,
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
}

// Batch write and flush (if we need to) every so often.
//
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
// means <5MB of memory imprecision.
if (coins_processed % 120000 == 0) {
if (ShutdownRequested()) {
return false;
}

const auto snapshot_cache_state = WITH_LOCK(::cs_main,
return snapshot_chainstate.GetCoinsCacheSizeState());
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
return snapshot_chainstate.GetCoinsCacheSizeState());

if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
// This is a hack - we don't know what the actual best block is, but that
// doesn't matter for the purposes of flushing the cache here. We'll set this
// to its correct value (`base_blockhash`) below after the coins are loaded.
coins_cache.SetBestBlock(GetRandHash());
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
// This is a hack - we don't know what the actual best block is, but that
// doesn't matter for the purposes of flushing the cache here. We'll set this
// to its correct value (`base_blockhash`) below after the coins are loaded.
coins_cache.SetBestBlock(GetRandHash());

// No need to acquire cs_main since this chainstate isn't being used yet.
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
// No need to acquire cs_main since this chainstate isn't being used yet.
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
}
}
}
} catch (const std::ios_base::failure&) {
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
}

Expand All @@ -5282,6 +5294,7 @@ bool ChainstateManager::PopulateAndValidateSnapshot(

bool out_of_coins{false};
try {
COutPoint outpoint;
coins_file >> outpoint;
} catch (const std::ios_base::failure&) {
// We expect an exception since we should be out of coins.
Expand Down
2 changes: 1 addition & 1 deletion test/functional/rpc_dumptxoutset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def run_test(self):
digest = hashlib.sha256(f.read()).hexdigest()
# UTXO snapshot hash should be deterministic based on mocked time.
assert_equal(
digest, 'b1bacb602eacf5fbc9a7c2ef6eeb0d229c04e98bdf0c2ea5929012cd0eae3830')
digest, '35aecd5263bf8c17b69ebd5ac9bb4317ebeee461a00fb61fc3e438f20544c142')

assert_equal(
out['txoutset_hash'], '1f7e3befd45dc13ae198dfbb22869a9c5c4196f8e9ef9735831af1288033f890')
Expand Down

0 comments on commit 7acfc2a

Please sign in to comment.