-
Notifications
You must be signed in to change notification settings - Fork 35.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rpc: Optimize serialization disk space of dumptxoutset #26045
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2668,21 +2668,42 @@ UniValue CreateUTXOSnapshot( | |
|
||
afile << metadata; | ||
|
||
std::map<uint256, std::vector<std::pair<uint32_t, Coin>>> mapCoins; | ||
unsigned int iter{0}; | ||
COutPoint key; | ||
uint256 last_hash; | ||
Coin coin; | ||
unsigned int iter{0}; | ||
std::vector<std::pair<uint32_t, Coin>> coins; | ||
|
||
auto write_coins_to_file = [&](AutoFile& afile, const uint256& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins) { | ||
afile << last_hash; | ||
afile << static_cast<uint16_t>(coins.size()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 4e19464: In Also, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think the magic happens here: https://github.com/bitcoin/bitcoin/blob/master/src/serialize.h#L674 However, we can't use that because we are not looking at a full transaction but rather the outpoints that are still left in the UTXO set. But we basically mimic that behavior here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the 65,536: I guess the blocksize solves this for us for now I think it makes sense to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A transaction with 65,537 OP_RETURN outputs should fit in a block. If I start with P2TR outputs with this calculator, that's 2,818,159 vbyte. https://bitcoinops.org/en/tools/calc-size/ And then subtract 32 bytes per output: 2,818,159 - 65537 * 32 = 720,975 vbyte cc @murchandamus can you add OP_RETURN to the dropdown? :-) In any case it seems unsafe to rely on the block size here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, but OP_RETURNs are not included in the UTXO set and we are serializing the UTXO set here, so I think this could still not happen like this. But I think you are right there are non-standard cases imaginable that make this possible, like just sending to OP_TRUE for example. So we should still make this robust. Anyway, I am using CompactSize now in #29612 :) |
||
for (auto [vout, coin] : coins) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be addressed in #29612 |
||
afile << vout; | ||
afile << coin; | ||
} | ||
}; | ||
|
||
pcursor->GetKey(key); | ||
last_hash = key.hash; | ||
while (pcursor->Valid()) { | ||
if (iter % 5000 == 0) node.rpc_interruption_point(); | ||
++iter; | ||
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) { | ||
afile << key; | ||
afile << coin; | ||
if (key.hash != last_hash) { | ||
write_coins_to_file(afile, last_hash, coins); | ||
last_hash = key.hash; | ||
coins.clear(); | ||
} | ||
coins.emplace_back(key.n, coin); | ||
} | ||
|
||
pcursor->Next(); | ||
} | ||
|
||
if (!coins.empty()) { | ||
write_coins_to_file(afile, last_hash, coins); | ||
} | ||
|
||
afile.fclose(); | ||
|
||
UniValue result(UniValue::VOBJ); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5393,69 +5393,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot( | |
return false; | ||
} | ||
|
||
COutPoint outpoint; | ||
Coin coin; | ||
const uint64_t coins_count = metadata.m_coins_count; | ||
uint64_t coins_left = metadata.m_coins_count; | ||
|
||
LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString()); | ||
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString()); | ||
int64_t coins_processed{0}; | ||
|
||
while (coins_left > 0) { | ||
try { | ||
coins_file >> outpoint; | ||
coins_file >> coin; | ||
} catch (const std::ios_base::failure&) { | ||
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
if (coin.nHeight > base_height || | ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash | ||
) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
if (!MoneyRange(coin.out.nValue)) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
Txid txid; | ||
coins_file >> txid; | ||
uint16_t size{0}; | ||
coins_file >> size; | ||
|
||
if(size > coins_left) { | ||
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the context of my remark above about maybe not serializing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nothing to do here, since we do have serialize |
||
return false; | ||
} | ||
|
||
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin)); | ||
for (int i = 0; i < size; i++) { | ||
COutPoint outpoint; | ||
Coin coin; | ||
coins_file >> outpoint.n; | ||
coins_file >> coin; | ||
outpoint.hash = txid; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 4e19464 nit: maybe move this above where you set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be addressed in #29612 |
||
if (coin.nHeight > base_height || | ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash | ||
) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
if (!MoneyRange(coin.out.nValue)) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin)); | ||
|
||
--coins_left; | ||
++coins_processed; | ||
--coins_left; | ||
++coins_processed; | ||
|
||
if (coins_processed % 1000000 == 0) { | ||
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n", | ||
coins_processed, | ||
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count), | ||
coins_cache.DynamicMemoryUsage() / (1000 * 1000)); | ||
} | ||
if (coins_processed % 1000000 == 0) { | ||
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n", | ||
coins_processed, | ||
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count), | ||
coins_cache.DynamicMemoryUsage() / (1000 * 1000)); | ||
} | ||
|
||
// Batch write and flush (if we need to) every so often. | ||
// | ||
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins | ||
// means <5MB of memory imprecision. | ||
if (coins_processed % 120000 == 0) { | ||
if (m_interrupt) { | ||
return false; | ||
} | ||
// Batch write and flush (if we need to) every so often. | ||
// | ||
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins | ||
// means <5MB of memory imprecision. | ||
if (coins_processed % 120000 == 0) { | ||
if (m_interrupt) { | ||
return false; | ||
} | ||
|
||
const auto snapshot_cache_state = WITH_LOCK(::cs_main, | ||
return snapshot_chainstate.GetCoinsCacheSizeState()); | ||
const auto snapshot_cache_state = WITH_LOCK(::cs_main, | ||
return snapshot_chainstate.GetCoinsCacheSizeState()); | ||
|
||
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) { | ||
// This is a hack - we don't know what the actual best block is, but that | ||
// doesn't matter for the purposes of flushing the cache here. We'll set this | ||
// to its correct value (`base_blockhash`) below after the coins are loaded. | ||
coins_cache.SetBestBlock(GetRandHash()); | ||
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) { | ||
// This is a hack - we don't know what the actual best block is, but that | ||
// doesn't matter for the purposes of flushing the cache here. We'll set this | ||
// to its correct value (`base_blockhash`) below after the coins are loaded. | ||
coins_cache.SetBestBlock(GetRandHash()); | ||
|
||
// No need to acquire cs_main since this chainstate isn't being used yet. | ||
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false); | ||
// No need to acquire cs_main since this chainstate isn't being used yet. | ||
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false); | ||
} | ||
} | ||
} | ||
} catch (const std::ios_base::failure&) { | ||
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated, but why doesn't this use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be addressed in #29612 |
||
coins_count - coins_left); | ||
return false; | ||
} | ||
} | ||
|
||
|
@@ -5468,6 +5480,7 @@ bool ChainstateManager::PopulateAndValidateSnapshot( | |
|
||
bool out_of_coins{false}; | ||
try { | ||
COutPoint outpoint; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be: Txid txid;
coins_file >> txid; The current code might accidentally work because a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be addressed in #29612 |
||
coins_file >> outpoint; | ||
} catch (const std::ios_base::failure&) { | ||
// We expect an exception since we should be out of coins. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,7 @@ def test_invalid_snapshot_scenarios(self, valid_snapshot_path): | |
bad_snapshot_path = valid_snapshot_path + '.mod' | ||
|
||
def expected_error(log_msg="", rpc_details=""): | ||
print(log_msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't forget to drop this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be addressed in #29612 |
||
with self.nodes[1].assert_debug_log([log_msg]): | ||
assert_raises_rpc_error(-32603, f"Unable to load UTXO snapshot{rpc_details}", self.nodes[1].loadtxoutset, bad_snapshot_path) | ||
|
||
|
@@ -101,9 +102,9 @@ def expected_error(log_msg="", rpc_details=""): | |
self.log.info(" - snapshot file with alternated UTXO data") | ||
cases = [ | ||
[b"\xff" * 32, 0, "7d52155c9a9fdc4525b637ef6170568e5dad6fabd0b1fdbb9432010b8453095b"], # wrong outpoint hash | ||
[(1).to_bytes(4, "little"), 32, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad"], # wrong outpoint index | ||
[b"\x81", 36, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8"], # wrong coin code VARINT((coinbase ? 1 : 0) | (height << 1)) | ||
[b"\x80", 36, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5"], # another wrong coin code | ||
[(1).to_bytes(4, "little"), 34, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad"], # wrong outpoint index | ||
[b"\x81", 38, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8"], # wrong coin code VARINT((coinbase ? 1 : 0) | (height << 1)) | ||
[b"\x80", 38, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5"], # another wrong coin code | ||
] | ||
|
||
for content, offset, wrong_hash in cases: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This move doesn't seem necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be addressed in #29612