From e2b4620a020202043c7f5bf3bdd7c3cf22f6f12a Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Thu, 20 Nov 2025 22:23:59 -0800 Subject: [PATCH 01/28] feat: implement snapshot bootstrapper with cbor download + omnibus changes --- Cargo.lock | 7 + modules/snapshot_bootstrapper/Cargo.toml | 6 + .../src/snapshot_bootstrapper.rs | 214 +++++++++++++++--- processes/omnibus/Cargo.toml | 1 + processes/omnibus/omnibus.toml | 23 +- processes/omnibus/src/main.rs | 10 +- 6 files changed, 227 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 223f5bd9..3661e552 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -373,9 +373,15 @@ version = "0.1.0" dependencies = [ "acropolis_common", "anyhow", + "async-compression", "caryatid_sdk", "config", + "futures-util", + "reqwest 0.12.24", + "serde", + "serde_json", "tokio", + "tokio-util", "tracing", ] @@ -499,6 +505,7 @@ dependencies = [ "acropolis_module_parameters_state", "acropolis_module_peer_network_interface", "acropolis_module_rest_blockfrost", + "acropolis_module_snapshot_bootstrapper", "acropolis_module_spdd_state", "acropolis_module_spo_state", "acropolis_module_stake_delta_filter", diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index 05a3d128..193ccb32 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -17,6 +17,12 @@ anyhow = { workspace = true } config = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.132" +async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } +futures-util = "0.3" +reqwest = "0.12" +tokio-util = { version = "0.7", features = ["io"] } [lib] path = "src/snapshot_bootstrapper.rs" diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index 459a222b..1e95341a 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -1,4 +1,4 @@ -use std::{str::FromStr, sync::Arc}; +use std::{path::Path, str::FromStr, sync::Arc}; use acropolis_common::{ genesis_values::GenesisValues, @@ -14,18 +14,40 @@ use acropolis_common::{ stake_addresses::AccountState, BlockHash, BlockInfo, BlockStatus, Era, GenesisDelegates, }; -use anyhow::Result; +use anyhow::{Context as AnyhowContext, Result}; +use async_compression::tokio::bufread::GzipDecoder; use caryatid_sdk::{module, Context, Module}; use config::Config; +use futures_util::TryStreamExt; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::io; +use tokio::fs::File; +use tokio::io::BufReader; use tokio::time::Instant; +use tokio_util::io::StreamReader; use tracing::{error, info, info_span, Instrument}; const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; const DEFAULT_COMPLETION_TOPIC: &str = "cardano.sequence.bootstrapped"; +/// Network configuration file (config.json) +#[derive(Debug, Deserialize, Serialize)] +struct NetworkConfig { + epochs: Vec, +} + +/// Snapshot metadata from snapshots.json +#[derive(Debug, Deserialize, Serialize, Clone)] +struct SnapshotFileMetadata { + epoch: u64, + point: String, + url: String, +} + /// Callback handler that accumulates snapshot data and builds state -struct SnapshotHandler { +pub struct SnapshotHandler { context: Arc>, snapshot_topic: String, @@ -223,20 +245,23 @@ impl SnapshotCallbacks for SnapshotHandler { impl SnapshotBootstrapper { pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { // TODO: read a config file path, not the snapshot-path; implement TODOs below. - let file_path = config - .get_string("snapshot-path") - .inspect_err(|e| error!("failed to find snapshot-path config: {e}"))?; + let network = config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()); + + let data_dir = config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()); let startup_topic = config.get_string("startup-topic").unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()); let snapshot_topic = config.get_string("snapshot-topic").unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()); - info!("Publishing snapshots on '{snapshot_topic}'"); let completion_topic = config.get_string("completion-topic").unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()); - info!("Completing with '{completion_topic}'"); + + info!("Snapshot bootstrapper initializing"); + info!(" Network: {}", network); + info!(" Data directory: {}", data_dir); + info!(" Publishing on '{}'", snapshot_topic); let mut subscription = context.subscribe(&startup_topic).await?; @@ -244,21 +269,88 @@ impl SnapshotBootstrapper { let Ok(_) = subscription.read().await else { return; }; - info!("Received startup message"); + info!("Received startup signal"); // TODO: // Read config file per docs in NOTES.md // read nonces // read headers // read and process ALL of the snapshot files, not just one. - let span = info_span!("snapshot_bootstrapper.handle"); async { - if let Err(e) = - Self::process_snapshot(&file_path, context.clone(), &completion_topic).await - { - error!("Failed to process snapshot: {}", e); + let network_dir = format!("{}/{}", data_dir, network); + let config_path = format!("{}/config.json", network_dir); + let snapshots_path = format!("{}/snapshots.json", network_dir); + + let network_config = match Self::read_network_config(&config_path) { + Ok(cfg) => cfg, + Err(e) => { + error!("Failed to read network config from {}: {}", config_path, e); + return; + } + }; + + info!("Loading snapshots for epochs: {:?}", network_config.epochs); + + // Read snapshots metadata + let all_snapshots = match Self::read_snapshots_metadata(&snapshots_path) { + Ok(snaps) => snaps, + Err(e) => { + error!( + "Failed to read snapshots metadata from {}: {}", + snapshots_path, e + ); + return; + } + }; + + // Filter for requested epochs + let target_snapshots: Vec<_> = all_snapshots + .iter() + .filter(|s| network_config.epochs.contains(&s.epoch)) + .cloned() + .collect(); + + if target_snapshots.is_empty() { + error!( + "No snapshots found for requested epochs: {:?}", + network_config.epochs + ); + return; + } + + info!("Found {} snapshot files to process", target_snapshots.len()); + + for snapshot_meta in &target_snapshots { + let filename = format!("{}.cbor", snapshot_meta.point); + let file_path = format!("{}/{}", network_dir, filename); + + if let Err(e) = + Self::ensure_snapshot_downloaded(&file_path, snapshot_meta).await + { + error!("Failed to download snapshot {}: {}", file_path, e); + return; + } + } + + for snapshot_meta in target_snapshots { + let filename = format!("{}.cbor", snapshot_meta.point); + let file_path = format!("{}/{}", network_dir, filename); + + info!( + "Processing snapshot for epoch {} from {}", + snapshot_meta.epoch, file_path + ); + + if let Err(e) = + Self::process_snapshot(&file_path, context.clone(), &completion_topic).await + { + error!("Failed to process snapshot {}: {}", file_path, e); + return; + } } + + info!("Snapshot bootstrap completed successfully"); } .instrument(span) .await; @@ -267,6 +359,85 @@ impl SnapshotBootstrapper { Ok(()) } + /// Read network configuration + fn read_network_config(path: &str) -> Result { + let content = fs::read_to_string(path) + .with_context(|| format!("Failed to read network config: {}", path))?; + let config: NetworkConfig = serde_json::from_str(&content) + .with_context(|| format!("Failed to parse network config: {}", path))?; + Ok(config) + } + + /// Read snapshots metadata + fn read_snapshots_metadata(path: &str) -> Result> { + let content = fs::read_to_string(path) + .with_context(|| format!("Failed to read snapshots metadata: {}", path))?; + let snapshots: Vec = serde_json::from_str(&content) + .with_context(|| format!("Failed to parse snapshots metadata: {}", path))?; + Ok(snapshots) + } + + /// Ensure snapshot is downloaded + async fn ensure_snapshot_downloaded( + file_path: &str, + metadata: &SnapshotFileMetadata, + ) -> Result<()> { + let path = Path::new(file_path); + + if path.exists() { + info!("Snapshot file already exists: {}", file_path); + return Ok(()); + } + + info!( + "Downloading snapshot from {} to {}", + metadata.url, file_path + ); + Self::download_and_decompress(&metadata.url, file_path).await?; + + info!("Downloaded: {}", file_path); + Ok(()) + } + + async fn download_and_decompress(url: &str, output_path: &str) -> Result<()> { + if let Some(parent) = Path::new(output_path).parent() { + tokio::fs::create_dir_all(parent).await?; + } + + let client = reqwest::Client::new(); + let response = client.get(url).send().await?; + + if !response.status().is_success() { + anyhow::bail!("Download failed with status: {}", response.status()); + } + + let total_size = response.content_length().unwrap_or(0); + if total_size > 0 { + info!("Downloading {} MB (compressed)...", total_size / 1_000_000); + } + + // Write to temporary file first (for atomic rename) + let tmp_path = Path::new(output_path).with_extension("partial"); + let mut file = File::create(&tmp_path).await?; + + // Stream, decompress, and write + let raw_stream_reader = + StreamReader::new(response.bytes_stream().map_err(io::Error::other)); + let buffered_reader = BufReader::new(raw_stream_reader); + let mut decoded_stream = GzipDecoder::new(buffered_reader); + + tokio::io::copy(&mut decoded_stream, &mut file).await?; + + // Ensure data is written to disk + file.sync_all().await?; + + // Atomic rename + tokio::fs::rename(&tmp_path, output_path).await?; + + Ok(()) + } + + /// Process a single snapshot file async fn process_snapshot( file_path: &str, context: Arc>, @@ -275,31 +446,20 @@ impl SnapshotBootstrapper { let parser = StreamingSnapshotParser::new(file_path); let mut callbacks = SnapshotHandler::new(context.clone(), completion_topic.to_string()); - info!( - "Starting snapshot parsing and publishing from: {}", - file_path - ); + info!("Starting snapshot parsing: {}", file_path); let start = Instant::now(); callbacks.publish_start().await?; - - // Parse the snapshot with our callback handler parser.parse(&mut callbacks)?; let duration = start.elapsed(); - info!( - "✓ Parse and publish completed successfully in {:.2?}", - duration - ); + info!("Parsed snapshot in {:.2?}", duration); - // Build the final state from accumulated data let block_info = callbacks.build_block_info()?; let genesis_values = callbacks.build_genesis_values()?; - // Publish completion message to trigger next phase (e.g., Mithril) callbacks.publish_completion(block_info, genesis_values).await?; - info!("Snapshot bootstrap completed successfully"); Ok(()) } } diff --git a/processes/omnibus/Cargo.toml b/processes/omnibus/Cargo.toml index 9afa646f..bb171711 100644 --- a/processes/omnibus/Cargo.toml +++ b/processes/omnibus/Cargo.toml @@ -33,6 +33,7 @@ acropolis_module_historical_accounts_state = { path = "../../modules/historical_ acropolis_module_historical_epochs_state = { path = "../../modules/historical_epochs_state" } acropolis_module_block_vrf_validator = { path = "../../modules/block_vrf_validator" } acropolis_module_block_kes_validator = { path = "../../modules/block_kes_validator" } +acropolis_module_snapshot_bootstrapper = { path = "../../modules/snapshot_bootstrapper" } caryatid_process = { workspace = true } caryatid_module_clock = { workspace = true } diff --git a/processes/omnibus/omnibus.toml b/processes/omnibus/omnibus.toml index 0548466d..0cbbe3c8 100644 --- a/processes/omnibus/omnibus.toml +++ b/processes/omnibus/omnibus.toml @@ -1,5 +1,15 @@ # Top-level configuration for Acropolis omnibus process +# ============================================================================ +# Startup Configuration +# ============================================================================ +[startup] +method = "snapshot" # Options: "genesis" | "mithril" | "snapshot" +topic = "cardano.sequence.start" + +# ============================================================================ +# Bootstrap Module Configurations +# ============================================================================ [module.genesis-bootstrapper] [module.mithril-snapshot-fetcher] @@ -10,6 +20,13 @@ download-max-age = "never" # Pause constraint E.g. "epoch:100", "block:1200" pause = "none" +[module.snapshot-bootstrapper] +network = "mainnet" +data-dir = "./data" + +# ============================================================================ +# Core Module Configurations +# ============================================================================ [module.peer-network-interface] sync-point = "snapshot" node-addresses = [ @@ -170,9 +187,9 @@ port = 4340 # Enable for message spying #topic = "cardano.#" -[startup] -topic = "cardano.sequence.start" - +# ============================================================================ +# Message Bus Configuration +# ============================================================================ [message-bus.external] class = "rabbit-mq" url = "amqp://127.0.0.1:5672/%2f" diff --git a/processes/omnibus/src/main.rs b/processes/omnibus/src/main.rs index c27df335..a537024c 100644 --- a/processes/omnibus/src/main.rs +++ b/processes/omnibus/src/main.rs @@ -19,14 +19,15 @@ use acropolis_module_consensus::Consensus; use acropolis_module_drdd_state::DRDDState; use acropolis_module_drep_state::DRepState; use acropolis_module_epochs_state::EpochsState; -use acropolis_module_genesis_bootstrapper::GenesisBootstrapper; +// use acropolis_module_genesis_bootstrapper::GenesisBootstrapper; use acropolis_module_governance_state::GovernanceState; use acropolis_module_historical_accounts_state::HistoricalAccountsState; use acropolis_module_historical_epochs_state::HistoricalEpochsState; -use acropolis_module_mithril_snapshot_fetcher::MithrilSnapshotFetcher; +// use acropolis_module_mithril_snapshot_fetcher::MithrilSnapshotFetcher; use acropolis_module_parameters_state::ParametersState; use acropolis_module_peer_network_interface::PeerNetworkInterface; use acropolis_module_rest_blockfrost::BlockfrostREST; +use acropolis_module_snapshot_bootstrapper::SnapshotBootstrapper; use acropolis_module_spdd_state::SPDDState; use acropolis_module_spo_state::SPOState; use acropolis_module_stake_delta_filter::StakeDeltaFilter; @@ -101,8 +102,9 @@ pub async fn main() -> Result<()> { let mut process = Process::::create(config).await; // Register modules - GenesisBootstrapper::register(&mut process); - MithrilSnapshotFetcher::register(&mut process); + // GenesisBootstrapper::register(&mut process); + // MithrilSnapshotFetcher::register(&mut process); + SnapshotBootstrapper::register(&mut process); BlockUnpacker::register(&mut process); PeerNetworkInterface::register(&mut process); TxUnpacker::register(&mut process); From dcaab64ffba0464dc173aa1102d49ea956ab31d4 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Fri, 21 Nov 2025 09:46:39 -0800 Subject: [PATCH 02/28] feat: enhance snapshot bootstrapper with error handling and configuration updates --- Cargo.lock | 1 + common/src/snapshot/streaming_snapshot.rs | 15 +- modules/snapshot_bootstrapper/Cargo.toml | 1 + .../src/snapshot_bootstrapper.rs | 186 ++++++++++-------- processes/omnibus/omnibus.toml | 4 +- processes/omnibus/src/main.rs | 34 +++- 6 files changed, 143 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3661e552..023a4905 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -380,6 +380,7 @@ dependencies = [ "reqwest 0.12.24", "serde", "serde_json", + "thiserror 2.0.17", "tokio", "tokio-util", "tracing", diff --git a/common/src/snapshot/streaming_snapshot.rs b/common/src/snapshot/streaming_snapshot.rs index 673acb12..e4dc9f48 100644 --- a/common/src/snapshot/streaming_snapshot.rs +++ b/common/src/snapshot/streaming_snapshot.rs @@ -1568,7 +1568,7 @@ impl StreamingSnapshotParser { if utxo_count.is_multiple_of(1000000) { let buffer_usage = buffer.len(); info!( - " Streamed {} UTXOs, buffer: {} MB, max entry: {} bytes", + "Streamed {} UTXOs, buffer: {} MB, max entry: {} bytes", utxo_count, buffer_usage / 1024 / 1024, max_single_entry_size @@ -1625,20 +1625,17 @@ impl StreamingSnapshotParser { } } - info!(" 🎯 STREAMING RESULTS:"); - info!(" • UTXOs processed: {}", utxo_count); + info!("Streaming results:"); + info!(" UTXOs processed: {}", utxo_count); info!( - " • Total data streamed: {:.2} MB", + " Total data streamed: {:.2} MB", total_bytes_processed as f64 / 1024.0 / 1024.0 ); info!( - " • Peak buffer usage: {} MB (vs 2.1GB before!)", + " Peak buffer usage: {} MB", PARSE_BUFFER_SIZE / 1024 / 1024 ); - info!( - " • Largest single entry: {} bytes", - max_single_entry_size - ); + info!(" Largest single entry: {} bytes", max_single_entry_size); Ok(utxo_count) } diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index 193ccb32..b5104383 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -22,6 +22,7 @@ serde_json = "1.0.132" async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } futures-util = "0.3" reqwest = "0.12" +thiserror = "2.0.17" tokio-util = { version = "0.7", features = ["io"] } [lib] diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index 1e95341a..ebc9e413 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -1,4 +1,8 @@ -use std::{path::Path, str::FromStr, sync::Arc}; +use std::{ + path::{Path, PathBuf}, + str::FromStr, + sync::Arc, +}; use acropolis_common::{ genesis_values::GenesisValues, @@ -14,7 +18,7 @@ use acropolis_common::{ stake_addresses::AccountState, BlockHash, BlockInfo, BlockStatus, Era, GenesisDelegates, }; -use anyhow::{Context as AnyhowContext, Result}; +use anyhow::Result; use async_compression::tokio::bufread::GzipDecoder; use caryatid_sdk::{module, Context, Module}; use config::Config; @@ -22,6 +26,7 @@ use futures_util::TryStreamExt; use serde::{Deserialize, Serialize}; use std::fs; use std::io; +use thiserror::Error; use tokio::fs::File; use tokio::io::BufReader; use tokio::time::Instant; @@ -32,10 +37,48 @@ const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; const DEFAULT_COMPLETION_TOPIC: &str = "cardano.sequence.bootstrapped"; +#[derive(Debug, Error)] +pub enum SnapshotBootstrapError { + #[error("Cannot read network config file {0}: {1}")] + ReadNetworkConfig(PathBuf, io::Error), + + #[error("Cannot read snapshots metadata file {0}: {1}")] + ReadSnapshotsFile(PathBuf, io::Error), + + #[error("Failed to parse network config {0}: {1}")] + MalformedNetworkConfig(PathBuf, serde_json::Error), + + #[error("Failed to parse snapshots JSON file {0}: {1}")] + MalformedSnapshotsFile(PathBuf, serde_json::Error), + + #[error("Cannot create directory {0}: {1}")] + CreateDirectory(PathBuf, io::Error), + + #[error("Failed to download snapshot from {0}: {1}")] + DownloadError(String, reqwest::Error), + + #[error("Download failed from {0}: HTTP status {1}")] + DownloadInvalidStatusCode(String, reqwest::StatusCode), + + #[error("I/O error: {0}")] + Io(#[from] io::Error), +} + /// Network configuration file (config.json) #[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] struct NetworkConfig { - epochs: Vec, + snapshots: Vec, + points: Vec, +} + +/// Point +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct Point { + epoch: u64, + id: String, + slot: u64, } /// Snapshot metadata from snapshots.json @@ -86,8 +129,6 @@ impl SnapshotHandler { let metadata = self.metadata.as_ref().ok_or_else(|| anyhow::anyhow!("No metadata available"))?; - // Create a synthetic BlockInfo representing the snapshot state - // This represents the last block included in the snapshot Ok(BlockInfo { status: BlockStatus::Immutable, // Snapshot blocks are immutable slot: 0, // TODO: Extract from snapshot metadata if available @@ -112,25 +153,22 @@ impl SnapshotHandler { // Shelley mainnet genesis hash (placeholder - should be from config) shelley_genesis_hash: Hash::<32>::from_str( "1a3be38bcbb7911969283716ad7aa550250226b76a61fc51cc9a9a35d9276d81", - ) - .unwrap(), + )?, genesis_delegs: GenesisDelegates::try_from(vec![]).unwrap(), }) } async fn publish_start(&self) -> Result<()> { - anyhow::Context::context( - self.context - .message_bus - .publish( - &self.snapshot_topic, - Arc::new(Message::Snapshot( - acropolis_common::messages::SnapshotMessage::Startup, - )), - ) - .await, - "Failed to publish start message", - ) + self.context + .message_bus + .publish( + &self.snapshot_topic, + Arc::new(Message::Snapshot( + acropolis_common::messages::SnapshotMessage::Startup, + )), + ) + .await + .map_err(|e| anyhow::anyhow!("Failed to publish start message: {}", e)) } async fn publish_completion( @@ -145,22 +183,20 @@ impl SnapshotHandler { }), )); - anyhow::Context::context( - self.context.message_bus.publish(&self.snapshot_topic, Arc::new(message)).await, - "Failed to publish completion", - ) + self.context + .message_bus + .publish(&self.snapshot_topic, Arc::new(message)) + .await + .map_err(|e| anyhow::anyhow!("Failed to publish completion: {}", e)) } } impl UtxoCallback for SnapshotHandler { fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { self.utxo_count += 1; - - // Log progress every million UTXOs if self.utxo_count.is_multiple_of(1_000_000) { info!("Processed {} UTXOs", self.utxo_count); } - // TODO: Accumulate UTXO data if needed or send in chunks to UTXOState processor Ok(()) } } @@ -169,7 +205,6 @@ impl PoolCallback for SnapshotHandler { fn on_pools(&mut self, pools: Vec) -> Result<()> { info!("Received {} pools", pools.len()); self.pools.extend(pools); - // TODO: Publish pool data. Ok(()) } } @@ -178,7 +213,6 @@ impl StakeCallback for SnapshotHandler { fn on_accounts(&mut self, accounts: Vec) -> Result<()> { info!("Received {} accounts", accounts.len()); self.accounts.extend(accounts); - // TODO: Publish account data. Ok(()) } } @@ -187,8 +221,6 @@ impl DRepCallback for SnapshotHandler { fn on_dreps(&mut self, dreps: Vec) -> Result<()> { info!("Received {} DReps", dreps.len()); self.dreps.extend(dreps); - // TODO: Publish DRep data. - Ok(()) } } @@ -197,7 +229,6 @@ impl ProposalCallback for SnapshotHandler { fn on_proposals(&mut self, proposals: Vec) -> Result<()> { info!("Received {} proposals", proposals.len()); self.proposals.extend(proposals); - // TODO: Publish proposal data. Ok(()) } } @@ -233,28 +264,18 @@ impl SnapshotCallbacks for SnapshotHandler { info!(" - Accounts: {}", self.accounts.len()); info!(" - DReps: {}", self.dreps.len()); info!(" - Proposals: {}", self.proposals.len()); - - // We could send a Resolver reference from here for large data, i.e. the UTXO set, - // which could be a file reference. For a file reference, we'd extend the parser to - // give us a callback value with the offset into the file; and we'd make the streaming - // UTXO parser public and reusable, adding it to the resolver implementation. Ok(()) } } impl SnapshotBootstrapper { pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { - // TODO: read a config file path, not the snapshot-path; implement TODOs below. let network = config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()); - let data_dir = config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()); - let startup_topic = config.get_string("startup-topic").unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()); - let snapshot_topic = config.get_string("snapshot-topic").unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()); - let completion_topic = config.get_string("completion-topic").unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()); @@ -271,11 +292,6 @@ impl SnapshotBootstrapper { }; info!("Received startup signal"); - // TODO: - // Read config file per docs in NOTES.md - // read nonces - // read headers - // read and process ALL of the snapshot files, not just one. let span = info_span!("snapshot_bootstrapper.handle"); async { let network_dir = format!("{}/{}", data_dir, network); @@ -285,36 +301,34 @@ impl SnapshotBootstrapper { let network_config = match Self::read_network_config(&config_path) { Ok(cfg) => cfg, Err(e) => { - error!("Failed to read network config from {}: {}", config_path, e); + error!("Failed to read network config: {}", e); return; } }; - info!("Loading snapshots for epochs: {:?}", network_config.epochs); + info!( + "Loading snapshots for epochs: {:?}", + network_config.snapshots + ); - // Read snapshots metadata let all_snapshots = match Self::read_snapshots_metadata(&snapshots_path) { Ok(snaps) => snaps, Err(e) => { - error!( - "Failed to read snapshots metadata from {}: {}", - snapshots_path, e - ); + error!("Failed to read snapshots metadata: {}", e); return; } }; - // Filter for requested epochs let target_snapshots: Vec<_> = all_snapshots .iter() - .filter(|s| network_config.epochs.contains(&s.epoch)) + .filter(|s| network_config.snapshots.contains(&s.epoch)) .cloned() .collect(); if target_snapshots.is_empty() { error!( "No snapshots found for requested epochs: {:?}", - network_config.epochs + network_config.snapshots ); return; } @@ -328,7 +342,7 @@ impl SnapshotBootstrapper { if let Err(e) = Self::ensure_snapshot_downloaded(&file_path, snapshot_meta).await { - error!("Failed to download snapshot {}: {}", file_path, e); + error!("Failed to download snapshot: {}", e); return; } } @@ -345,7 +359,7 @@ impl SnapshotBootstrapper { if let Err(e) = Self::process_snapshot(&file_path, context.clone(), &completion_topic).await { - error!("Failed to process snapshot {}: {}", file_path, e); + error!("Failed to process snapshot: {}", e); return; } } @@ -360,28 +374,36 @@ impl SnapshotBootstrapper { } /// Read network configuration - fn read_network_config(path: &str) -> Result { - let content = fs::read_to_string(path) - .with_context(|| format!("Failed to read network config: {}", path))?; + fn read_network_config(path: &str) -> Result { + let path_buf = PathBuf::from(path); + let content = fs::read_to_string(&path_buf) + .map_err(|e| SnapshotBootstrapError::ReadNetworkConfig(path_buf.clone(), e))?; + let config: NetworkConfig = serde_json::from_str(&content) - .with_context(|| format!("Failed to parse network config: {}", path))?; + .map_err(|e| SnapshotBootstrapError::MalformedNetworkConfig(path_buf, e))?; + Ok(config) } - /// Read snapshots metadata - fn read_snapshots_metadata(path: &str) -> Result> { - let content = fs::read_to_string(path) - .with_context(|| format!("Failed to read snapshots metadata: {}", path))?; + /// Read snapshot metadata + fn read_snapshots_metadata( + path: &str, + ) -> Result, SnapshotBootstrapError> { + let path_buf = PathBuf::from(path); + let content = fs::read_to_string(&path_buf) + .map_err(|e| SnapshotBootstrapError::ReadSnapshotsFile(path_buf.clone(), e))?; + let snapshots: Vec = serde_json::from_str(&content) - .with_context(|| format!("Failed to parse snapshots metadata: {}", path))?; + .map_err(|e| SnapshotBootstrapError::MalformedSnapshotsFile(path_buf, e))?; + Ok(snapshots) } - /// Ensure snapshot is downloaded + /// Ensure the snapshot is downloaded async fn ensure_snapshot_downloaded( file_path: &str, metadata: &SnapshotFileMetadata, - ) -> Result<()> { + ) -> Result<(), SnapshotBootstrapError> { let path = Path::new(file_path); if path.exists() { @@ -393,22 +415,30 @@ impl SnapshotBootstrapper { "Downloading snapshot from {} to {}", metadata.url, file_path ); - Self::download_and_decompress(&metadata.url, file_path).await?; - + Self::download_snapshot(&metadata.url, file_path).await?; info!("Downloaded: {}", file_path); Ok(()) } - async fn download_and_decompress(url: &str, output_path: &str) -> Result<()> { + async fn download_snapshot(url: &str, output_path: &str) -> Result<(), SnapshotBootstrapError> { if let Some(parent) = Path::new(output_path).parent() { - tokio::fs::create_dir_all(parent).await?; + tokio::fs::create_dir_all(parent) + .await + .map_err(|e| SnapshotBootstrapError::CreateDirectory(parent.to_path_buf(), e))?; } let client = reqwest::Client::new(); - let response = client.get(url).send().await?; + let response = client + .get(url) + .send() + .await + .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))?; if !response.status().is_success() { - anyhow::bail!("Download failed with status: {}", response.status()); + return Err(SnapshotBootstrapError::DownloadInvalidStatusCode( + url.to_string(), + response.status(), + )); } let total_size = response.content_length().unwrap_or(0); @@ -416,22 +446,16 @@ impl SnapshotBootstrapper { info!("Downloading {} MB (compressed)...", total_size / 1_000_000); } - // Write to temporary file first (for atomic rename) let tmp_path = Path::new(output_path).with_extension("partial"); let mut file = File::create(&tmp_path).await?; - // Stream, decompress, and write let raw_stream_reader = StreamReader::new(response.bytes_stream().map_err(io::Error::other)); let buffered_reader = BufReader::new(raw_stream_reader); let mut decoded_stream = GzipDecoder::new(buffered_reader); tokio::io::copy(&mut decoded_stream, &mut file).await?; - - // Ensure data is written to disk file.sync_all().await?; - - // Atomic rename tokio::fs::rename(&tmp_path, output_path).await?; Ok(()) diff --git a/processes/omnibus/omnibus.toml b/processes/omnibus/omnibus.toml index 0cbbe3c8..f6805c04 100644 --- a/processes/omnibus/omnibus.toml +++ b/processes/omnibus/omnibus.toml @@ -4,7 +4,7 @@ # Startup Configuration # ============================================================================ [startup] -method = "snapshot" # Options: "genesis" | "mithril" | "snapshot" +method = "genesis" # Options: "genesis" | "snapshot" topic = "cardano.sequence.start" # ============================================================================ @@ -22,7 +22,7 @@ pause = "none" [module.snapshot-bootstrapper] network = "mainnet" -data-dir = "./data" +data-dir = "../../modules/snapshot_bootstrapper/data" # ============================================================================ # Core Module Configurations diff --git a/processes/omnibus/src/main.rs b/processes/omnibus/src/main.rs index a537024c..55d2cee8 100644 --- a/processes/omnibus/src/main.rs +++ b/processes/omnibus/src/main.rs @@ -19,11 +19,11 @@ use acropolis_module_consensus::Consensus; use acropolis_module_drdd_state::DRDDState; use acropolis_module_drep_state::DRepState; use acropolis_module_epochs_state::EpochsState; -// use acropolis_module_genesis_bootstrapper::GenesisBootstrapper; +use acropolis_module_genesis_bootstrapper::GenesisBootstrapper; use acropolis_module_governance_state::GovernanceState; use acropolis_module_historical_accounts_state::HistoricalAccountsState; use acropolis_module_historical_epochs_state::HistoricalEpochsState; -// use acropolis_module_mithril_snapshot_fetcher::MithrilSnapshotFetcher; +use acropolis_module_mithril_snapshot_fetcher::MithrilSnapshotFetcher; use acropolis_module_parameters_state::ParametersState; use acropolis_module_peer_network_interface::PeerNetworkInterface; use acropolis_module_rest_blockfrost::BlockfrostREST; @@ -99,12 +99,34 @@ pub async fn main() -> Result<()> { ); // Create the process - let mut process = Process::::create(config).await; + let mut process = Process::::create(config.clone()).await; + + // Get startup method from config + let startup_method = + config.get_string("startup.method").unwrap_or_else(|_| "snapshot".to_string()); + + info!("Using startup method: {}", startup_method); + + // Register bootstrap modules based on startup method + match startup_method.as_str() { + "genesis" => { + info!("Registering GenesisBootstrapper"); + GenesisBootstrapper::register(&mut process); + } + "snapshot" => { + info!("Registering SnapshotBootstrapper"); + SnapshotBootstrapper::register(&mut process); + } + _ => { + panic!( + "Invalid startup method: {}. Must be one of: genesis, snapshot", + startup_method + ); + } + } // Register modules - // GenesisBootstrapper::register(&mut process); - // MithrilSnapshotFetcher::register(&mut process); - SnapshotBootstrapper::register(&mut process); + MithrilSnapshotFetcher::register(&mut process); BlockUnpacker::register(&mut process); PeerNetworkInterface::register(&mut process); TxUnpacker::register(&mut process); From 25c24904b198ab7cd18bdd1ca4688d958650de08 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Fri, 21 Nov 2025 10:06:02 -0800 Subject: [PATCH 03/28] add todos back --- .../src/snapshot_bootstrapper.rs | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index ebc9e413..f9edbb1a 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -20,7 +20,7 @@ use acropolis_common::{ }; use anyhow::Result; use async_compression::tokio::bufread::GzipDecoder; -use caryatid_sdk::{module, Context, Module}; +use caryatid_sdk::{module, Context}; use config::Config; use futures_util::TryStreamExt; use serde::{Deserialize, Serialize}; @@ -129,6 +129,8 @@ impl SnapshotHandler { let metadata = self.metadata.as_ref().ok_or_else(|| anyhow::anyhow!("No metadata available"))?; + // Create a synthetic BlockInfo representing the snapshot state + // This represents the last block included in the snapshot Ok(BlockInfo { status: BlockStatus::Immutable, // Snapshot blocks are immutable slot: 0, // TODO: Extract from snapshot metadata if available @@ -154,7 +156,7 @@ impl SnapshotHandler { shelley_genesis_hash: Hash::<32>::from_str( "1a3be38bcbb7911969283716ad7aa550250226b76a61fc51cc9a9a35d9276d81", )?, - genesis_delegs: GenesisDelegates::try_from(vec![]).unwrap(), + genesis_delegs: GenesisDelegates::try_from(vec![])?, }) } @@ -194,9 +196,12 @@ impl SnapshotHandler { impl UtxoCallback for SnapshotHandler { fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { self.utxo_count += 1; + + // Log progress every million UTXOs if self.utxo_count.is_multiple_of(1_000_000) { info!("Processed {} UTXOs", self.utxo_count); } + // TODO: Accumulate UTXO data if needed or send in chunks to UTXOState processor Ok(()) } } @@ -205,6 +210,7 @@ impl PoolCallback for SnapshotHandler { fn on_pools(&mut self, pools: Vec) -> Result<()> { info!("Received {} pools", pools.len()); self.pools.extend(pools); + // TODO: Publish pool data. Ok(()) } } @@ -213,6 +219,7 @@ impl StakeCallback for SnapshotHandler { fn on_accounts(&mut self, accounts: Vec) -> Result<()> { info!("Received {} accounts", accounts.len()); self.accounts.extend(accounts); + // TODO: Publish account data. Ok(()) } } @@ -221,6 +228,8 @@ impl DRepCallback for SnapshotHandler { fn on_dreps(&mut self, dreps: Vec) -> Result<()> { info!("Received {} DReps", dreps.len()); self.dreps.extend(dreps); + // TODO: Publish DRep data. + Ok(()) } } @@ -229,6 +238,7 @@ impl ProposalCallback for SnapshotHandler { fn on_proposals(&mut self, proposals: Vec) -> Result<()> { info!("Received {} proposals", proposals.len()); self.proposals.extend(proposals); + // TODO: Publish proposal data. Ok(()) } } @@ -264,6 +274,11 @@ impl SnapshotCallbacks for SnapshotHandler { info!(" - Accounts: {}", self.accounts.len()); info!(" - DReps: {}", self.dreps.len()); info!(" - Proposals: {}", self.proposals.len()); + + // We could send a Resolver reference from here for large data, i.e. the UTXO set, + // which could be a file reference. For a file reference, we'd extend the parser to + // give us a callback value with the offset into the file; and we'd make the streaming + // UTXO parser public and reusable, adding it to the resolver implementation. Ok(()) } } @@ -278,7 +293,8 @@ impl SnapshotBootstrapper { config.get_string("snapshot-topic").unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()); let completion_topic = config.get_string("completion-topic").unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()); - + info!("Publishing snapshots on '{snapshot_topic}'"); + info!("Completing with '{completion_topic}'"); info!("Snapshot bootstrapper initializing"); info!(" Network: {}", network); info!(" Data directory: {}", data_dir); @@ -290,7 +306,13 @@ impl SnapshotBootstrapper { let Ok(_) = subscription.read().await else { return; }; - info!("Received startup signal"); + info!("Received startup message"); + + // TODO: + // Read config file per docs in NOTES.md + // read nonces + // read headers + // read and process ALL of the snapshot files, not just one. let span = info_span!("snapshot_bootstrapper.handle"); async { @@ -479,11 +501,14 @@ impl SnapshotBootstrapper { let duration = start.elapsed(); info!("Parsed snapshot in {:.2?}", duration); + // Build the final state from accumulated data let block_info = callbacks.build_block_info()?; let genesis_values = callbacks.build_genesis_values()?; + // Publish completion message to trigger next phase (e.g., Mithril) callbacks.publish_completion(block_info, genesis_values).await?; + info!("Snapshot bootstrap completed successfully"); Ok(()) } } From 8e73547e288b0d5d58547d12afc23561e8b12f58 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Fri, 21 Nov 2025 10:16:48 -0800 Subject: [PATCH 04/28] feat: add configuration files for snapshot management --- .../data/mainnet/config.json | 104 ++++++++++++++++++ .../data/mainnet/snapshots.json | 17 +++ 2 files changed, 121 insertions(+) create mode 100644 modules/snapshot_bootstrapper/data/mainnet/config.json create mode 100644 modules/snapshot_bootstrapper/data/mainnet/snapshots.json diff --git a/modules/snapshot_bootstrapper/data/mainnet/config.json b/modules/snapshot_bootstrapper/data/mainnet/config.json new file mode 100644 index 00000000..a1bb266a --- /dev/null +++ b/modules/snapshot_bootstrapper/data/mainnet/config.json @@ -0,0 +1,104 @@ +{ + "snapshots": [ + 507, + 508, + 509 + ], + "points": [ + { + "epoch": 507, + "id": "670ca68c3de580f8469677754a725e86ca72a7be381d3108569f0704a5fca327", + "slot": 134092758 + }, + { + "epoch": 508, + "id": "29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494", + "slot": 134524753 + }, + { + "epoch": 509, + "id": "6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c", + "slot": 134956789 + }, + { + "epoch": 510, + "id": "3fd738bacbcc277d43358a28ed15fa4335977c822fd7546d0de2606d7d2a57aa", + "slot": 135388794 + }, + { + "epoch": 511, + "id": "d02f89d21fe9c80f927eeda31fadb03b589db2ac5c8108d7171c4c319aca2fa1", + "slot": 135820797 + }, + { + "epoch": 512, + "id": "9503c7c669746be68ca34ed4d822d9d0dcccd5c0ef61cb9679a1c1e739534853", + "slot": 136252793 + }, + { + "epoch": 513, + "id": "e2bb0babbc715953ce1edc4e0c817a4b1fa9d36124648b4d21d1a2ccd26be672", + "slot": 136684793 + }, + { + "epoch": 514, + "id": "2c4f7a0a855e76e5d83b9d3e168213711490663dddfc6925e09a37fe46ed62b4", + "slot": 137116798 + }, + { + "epoch": 515, + "id": "66c5229785de3ff7bb2834db69fc8da5d3203a7cdf2d6983b3e9e155ff6ec0fb", + "slot": 137548794 + }, + { + "epoch": 516, + "id": "b934fa686e585636cc74a07555dbd8c10f9680464f80273f1d29806ecbc5e822", + "slot": 137980781 + }, + { + "epoch": 517, + "id": "7c4afb5f4ba5d7182f99fd839e26302bcdca06c9066b825f2f40f4a094d7f0ab", + "slot": 138412701 + }, + { + "epoch": 518, + "id": "a65138e908ccc90014b4ae740382c7908f9636e56c6e9d6ecec38f452b70c93f", + "slot": 138844799 + }, + { + "epoch": 519, + "id": "0cffc5eb77a6885257fcba94b8fd6fdddc80e368bf4ef855f058c6adda4933c1", + "slot": 139276793 + }, + { + "epoch": 520, + "id": "58f198313d00d639814db34f32aad259e22c53089dfa95dae79e0e2e4d93c6f0", + "slot": 139708765 + }, + { + "epoch": 521, + "id": "7e423f52284987b4b358a0a9b6847525c42a818a024dde663101669ab2e8a6ee", + "slot": 140140779 + }, + { + "epoch": 522, + "id": "86f874039f07143ab4d7d5c6ccb27ea33fd1440f81176055fe9e4e6e910800e9", + "slot": 140572798 + }, + { + "epoch": 523, + "id": "96a53046d8bbfa690b6bfbc2c7f99036b3494f99e616e998224bcfcd33b84e7b", + "slot": 141004797 + }, + { + "epoch": 524, + "id": "865267d5b5fe9d497418ea72c9b84058e5aa2a98ace96043d53fec32eebf4fef", + "slot": 141436773 + }, + { + "epoch": 525, + "id": "e4846337e6f87ed65c88e770ab5c1bec39de45cbf3bdde88b249ac1ad2cd2a8a", + "slot": 141868737 + } + ] +} diff --git a/modules/snapshot_bootstrapper/data/mainnet/snapshots.json b/modules/snapshot_bootstrapper/data/mainnet/snapshots.json new file mode 100644 index 00000000..695a55ec --- /dev/null +++ b/modules/snapshot_bootstrapper/data/mainnet/snapshots.json @@ -0,0 +1,17 @@ +[ + { + "epoch": 507, + "point": "134092758.670ca68c3de580f8469677754a725e86ca72a7be381d3108569f0704a5fca327", + "url": "https://pub-b844360df4774bb092a2bb2043b888e5.r2.dev/134092758.670ca68c3de580f8469677754a725e86ca72a7be381d3108569f0704a5fca327.cbor.gz" + }, + { + "epoch": 508, + "point": "134524753.29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494", + "url": "https://pub-b844360df4774bb092a2bb2043b888e5.r2.dev/134524753.29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494.cbor.gz" + }, + { + "epoch": 509, + "point": "134956789.6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c", + "url": "https://pub-b844360df4774bb092a2bb2043b888e5.r2.dev/134956789.6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c.cbor.gz" + } +] From e486a972fb8a00f7118554eebf65c8d9aa7aa3d6 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Fri, 21 Nov 2025 15:20:01 -0800 Subject: [PATCH 05/28] feat: refactor snapshot bootstrapper with improved config handling and streamlined snapshot processing --- Cargo.lock | 3 +- common/src/snapshot/NOTES.md | 12 +- .../src/mithril_snapshot_fetcher.rs | 2 +- modules/snapshot_bootstrapper/Cargo.toml | 7 +- .../src/snapshot_bootstrapper.rs | 753 +++++++++++++----- processes/omnibus/omnibus.toml | 2 +- processes/omnibus/src/main.rs | 15 +- 7 files changed, 558 insertions(+), 236 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6033525..701927eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -376,13 +376,12 @@ dependencies = [ "async-compression", "caryatid_sdk", "config", - "futures-util", "reqwest 0.12.24", "serde", "serde_json", + "tempfile", "thiserror 2.0.17", "tokio", - "tokio-util", "tracing", ] diff --git a/common/src/snapshot/NOTES.md b/common/src/snapshot/NOTES.md index 710ebd3f..6dfcd5ca 100644 --- a/common/src/snapshot/NOTES.md +++ b/common/src/snapshot/NOTES.md @@ -1,6 +1,6 @@ # Bootstrapping from a Snapshot file -We can boot an Acropolis node either from geneis and replay all of the blocks up to +We can boot an Acropolis node either from genesis and replay all of the blocks up to some point, or we can boot from a snapshot file. This module provides the components needed to boot from a snapshot file. See [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) for the process that @@ -12,7 +12,7 @@ that rely only on that epoch of data. We're also skipping some of the problemati eras and will typically boot from Conway around epoch 305, 306, and 307. It takes three epochs to have enough context to correctly calculate the rewards. -The required data for boostrapping are: +The required data for bootstrapping are: - snapshot files (each has an associated epoch number and point) - nonces @@ -51,7 +51,7 @@ a network name of `preview`, the expected layout for configuration files would b * `data/preview/nonces.json`: a list of `InitialNonces` values, * `data/preview/headers.json`: a list of `Point`s. -These files are loaded by [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) +These files are loaded by [snapshot_bootstrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) during bootup. ## Bootstrapping sequence @@ -72,7 +72,7 @@ for each of the three snapshot files. Loading occurs in this order: Modules in the system will have subscribed to the Startup message and also to individual structural data update messages before the -boostrapper runs the above sequence. Upon receiving the `Startup` message, +bootstrapper runs the above sequence. Upon receiving the `Startup` message, they will use data messages to populate their state, history (for BlockFrost), and any other state required to achieve readiness to operate on reception of the `GenesisCompleteMessage`. @@ -82,9 +82,9 @@ the `GenesisCompleteMessage`. The bootstrapper will publish data as it parses the snapshot files, nonces, and headers. Snapshot parsing is done while streaming the data to keep the memory footprint lower. As elements of the file are parsed, callbacks provide the data -to the boostrapper which publishes the data on the message bus. +to the bootstrapper which publishes the data on the message bus. -There are TODO markers in [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) +There are TODO markers in [snapshot_bootstrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) that show where to add the publishing of the parsed snapshot data. diff --git a/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs b/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs index 5df5cd22..1a1a91e1 100644 --- a/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs +++ b/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs @@ -379,7 +379,7 @@ impl MithrilSnapshotFetcher { // Send completion message if let Some(last_block_info) = last_block_info { info!( - "Finished shapshot at block {}, epoch {}", + "Finished snapshot at block {}, epoch {}", last_block_info.number, last_block_info.epoch ); let message_enum = diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index b5104383..3e16f4c6 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -20,10 +20,11 @@ tracing = { workspace = true } serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.132" async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } -futures-util = "0.3" -reqwest = "0.12" +reqwest = { version = "0.12", features = ["stream"] } thiserror = "2.0.17" -tokio-util = { version = "0.7", features = ["io"] } + +[dev-dependencies] +tempfile = "3" [lib] path = "src/snapshot_bootstrapper.rs" diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index f9edbb1a..5b276d9e 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -1,13 +1,10 @@ use std::{ path::{Path, PathBuf}, - str::FromStr, sync::Arc, }; use acropolis_common::{ - genesis_values::GenesisValues, - hash::Hash, - messages::{CardanoMessage, GenesisCompleteMessage, Message}, + messages::{CardanoMessage, Message}, snapshot::{ streaming_snapshot::{ DRepCallback, DRepInfo, GovernanceProposal, PoolCallback, PoolInfo, ProposalCallback, @@ -16,13 +13,12 @@ use acropolis_common::{ StreamingSnapshotParser, }, stake_addresses::AccountState, - BlockHash, BlockInfo, BlockStatus, Era, GenesisDelegates, + BlockHash, BlockInfo, BlockStatus, Era, }; -use anyhow::Result; +use anyhow::{bail, Result}; use async_compression::tokio::bufread::GzipDecoder; -use caryatid_sdk::{module, Context}; +use caryatid_sdk::{module, Context, Subscription}; use config::Config; -use futures_util::TryStreamExt; use serde::{Deserialize, Serialize}; use std::fs; use std::io; @@ -30,12 +26,12 @@ use thiserror::Error; use tokio::fs::File; use tokio::io::BufReader; use tokio::time::Instant; -use tokio_util::io::StreamReader; use tracing::{error, info, info_span, Instrument}; const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; -const DEFAULT_COMPLETION_TOPIC: &str = "cardano.sequence.bootstrapped"; +const DEFAULT_COMPLETION_TOPIC: &str = "cardano.snapshot.complete"; +const DEFAULT_BOOTSTRAPPED_TOPIC: &str = "cardano.sequence.bootstrapped"; #[derive(Debug, Error)] pub enum SnapshotBootstrapError { @@ -62,6 +58,53 @@ pub enum SnapshotBootstrapError { #[error("I/O error: {0}")] Io(#[from] io::Error), + + #[error("Snapshot parsing failed: {0}")] + ParseError(String), +} + +/// Configuration for the snapshot bootstrapper +#[derive(Debug, Clone)] +struct SnapshotConfig { + network: String, + data_dir: String, + startup_topic: String, + snapshot_topic: String, + bootstrapped_topic: String, + completion_topic: String, +} + +impl SnapshotConfig { + fn try_load(config: &Config) -> Result { + Ok(Self { + network: config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()), + data_dir: config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()), + startup_topic: config + .get_string("startup-topic") + .unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()), + snapshot_topic: config + .get_string("snapshot-topic") + .unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()), + bootstrapped_topic: config + .get_string("bootstrapped-subscribe-topic") + .unwrap_or(DEFAULT_BOOTSTRAPPED_TOPIC.to_string()), + completion_topic: config + .get_string("completion-topic") + .unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()), + }) + } + + fn network_dir(&self) -> String { + format!("{}/{}", self.data_dir, self.network) + } + + fn config_path(&self) -> String { + format!("{}/config.json", self.network_dir()) + } + + fn snapshots_path(&self) -> String { + format!("{}/snapshots.json", self.network_dir()) + } } /// Network configuration file (config.json) @@ -89,12 +132,11 @@ struct SnapshotFileMetadata { url: String, } -/// Callback handler that accumulates snapshot data and builds state -pub struct SnapshotHandler { +/// Handles publishing snapshot data to the message bus +struct SnapshotPublisher { context: Arc>, + completion_topic: String, snapshot_topic: String, - - // Accumulated data from callbacks metadata: Option, utxo_count: u64, pools: Vec, @@ -103,17 +145,15 @@ pub struct SnapshotHandler { proposals: Vec, } -#[module( - message_type(Message), - name = "snapshot-bootstrapper", - description = "Snapshot Bootstrapper to broadcast state" -)] -pub struct SnapshotBootstrapper; - -impl SnapshotHandler { - fn new(context: Arc>, snapshot_topic: String) -> Self { +impl SnapshotPublisher { + fn new( + context: Arc>, + completion_topic: String, + snapshot_topic: String, + ) -> Self { Self { context, + completion_topic, snapshot_topic, metadata: None, utxo_count: 0, @@ -124,76 +164,23 @@ impl SnapshotHandler { } } - /// Build BlockInfo from accumulated metadata - fn build_block_info(&self) -> Result { - let metadata = - self.metadata.as_ref().ok_or_else(|| anyhow::anyhow!("No metadata available"))?; - - // Create a synthetic BlockInfo representing the snapshot state - // This represents the last block included in the snapshot - Ok(BlockInfo { - status: BlockStatus::Immutable, // Snapshot blocks are immutable - slot: 0, // TODO: Extract from snapshot metadata if available - number: 0, // TODO: Extract from snapshot metadata if available - hash: BlockHash::default(), // TODO: Extract from snapshot metadata if available - epoch: metadata.epoch, - epoch_slot: 0, // TODO: Extract from snapshot metadata if available - new_epoch: false, // Not necessarily a new epoch - timestamp: 0, // TODO: Extract from snapshot metadata if available - era: Era::Conway, // TODO: Determine from snapshot or config - }) - } - - /// Build GenesisValues from snapshot data - fn build_genesis_values(&self) -> Result { - // TODO: These values should ideally come from the snapshot or configuration - // For now, using defaults for Conway era - Ok(GenesisValues { - byron_timestamp: 1506203091, // Byron mainnet genesis timestamp - shelley_epoch: 208, // Shelley started at epoch 208 on mainnet - shelley_epoch_len: 432000, // 5 days in seconds - // Shelley mainnet genesis hash (placeholder - should be from config) - shelley_genesis_hash: Hash::<32>::from_str( - "1a3be38bcbb7911969283716ad7aa550250226b76a61fc51cc9a9a35d9276d81", - )?, - genesis_delegs: GenesisDelegates::try_from(vec![])?, - }) - } - async fn publish_start(&self) -> Result<()> { - self.context - .message_bus - .publish( - &self.snapshot_topic, - Arc::new(Message::Snapshot( - acropolis_common::messages::SnapshotMessage::Startup, - )), - ) - .await - .map_err(|e| anyhow::anyhow!("Failed to publish start message: {}", e)) + let message = Arc::new(Message::Snapshot( + acropolis_common::messages::SnapshotMessage::Startup, + )); + self.context.publish(&self.snapshot_topic, message).await } - async fn publish_completion( - &self, - block_info: BlockInfo, - genesis_values: GenesisValues, - ) -> Result<()> { - let message = Message::Cardano(( + async fn publish_completion(&self, block_info: BlockInfo) -> Result<()> { + let message = Arc::new(Message::Cardano(( block_info, - CardanoMessage::GenesisComplete(GenesisCompleteMessage { - values: genesis_values, - }), - )); - - self.context - .message_bus - .publish(&self.snapshot_topic, Arc::new(message)) - .await - .map_err(|e| anyhow::anyhow!("Failed to publish completion: {}", e)) + CardanoMessage::SnapshotComplete, + ))); + self.context.publish(&self.completion_topic, message).await } } -impl UtxoCallback for SnapshotHandler { +impl UtxoCallback for SnapshotPublisher { fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { self.utxo_count += 1; @@ -206,49 +193,44 @@ impl UtxoCallback for SnapshotHandler { } } -impl PoolCallback for SnapshotHandler { +impl PoolCallback for SnapshotPublisher { fn on_pools(&mut self, pools: Vec) -> Result<()> { info!("Received {} pools", pools.len()); self.pools.extend(pools); - // TODO: Publish pool data. Ok(()) } } -impl StakeCallback for SnapshotHandler { +impl StakeCallback for SnapshotPublisher { fn on_accounts(&mut self, accounts: Vec) -> Result<()> { info!("Received {} accounts", accounts.len()); self.accounts.extend(accounts); - // TODO: Publish account data. Ok(()) } } -impl DRepCallback for SnapshotHandler { +impl DRepCallback for SnapshotPublisher { fn on_dreps(&mut self, dreps: Vec) -> Result<()> { info!("Received {} DReps", dreps.len()); self.dreps.extend(dreps); - // TODO: Publish DRep data. - Ok(()) } } -impl ProposalCallback for SnapshotHandler { +impl ProposalCallback for SnapshotPublisher { fn on_proposals(&mut self, proposals: Vec) -> Result<()> { info!("Received {} proposals", proposals.len()); self.proposals.extend(proposals); - // TODO: Publish proposal data. Ok(()) } } -impl SnapshotCallbacks for SnapshotHandler { +impl SnapshotCallbacks for SnapshotPublisher { fn on_metadata(&mut self, metadata: SnapshotMetadata) -> Result<()> { - info!("Received snapshot metadata for epoch {}", metadata.epoch); - info!(" - UTXOs: {:?}", metadata.utxo_count); + info!("Snapshot metadata for epoch {}", metadata.epoch); + info!(" UTXOs: {:?}", metadata.utxo_count); info!( - " - Pot balances: treasury={}, reserves={}, deposits={}", + " Pot balances: treasury={}, reserves={}, deposits={}", metadata.pot_balances.treasury, metadata.pot_balances.reserves, metadata.pot_balances.deposits @@ -283,70 +265,63 @@ impl SnapshotCallbacks for SnapshotHandler { } } +#[module( + message_type(Message), + name = "snapshot-bootstrapper", + description = "Snapshot Bootstrapper to broadcast state via streaming" +)] +pub struct SnapshotBootstrapper; + impl SnapshotBootstrapper { pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { - let network = config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()); - let data_dir = config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()); - let startup_topic = - config.get_string("startup-topic").unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()); - let snapshot_topic = - config.get_string("snapshot-topic").unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()); - let completion_topic = - config.get_string("completion-topic").unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()); - info!("Publishing snapshots on '{snapshot_topic}'"); - info!("Completing with '{completion_topic}'"); + let cfg = SnapshotConfig::try_load(&config)?; + info!("Snapshot bootstrapper initializing"); - info!(" Network: {}", network); - info!(" Data directory: {}", data_dir); - info!(" Publishing on '{}'", snapshot_topic); + info!(" Network: {}", cfg.network); + info!(" Data directory: {}", cfg.data_dir); + info!(" Publishing on '{}'", cfg.snapshot_topic); + info!(" Completing with '{}'", cfg.completion_topic); - let mut subscription = context.subscribe(&startup_topic).await?; + let startup_sub = context.subscribe(&cfg.startup_topic).await?; + let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_topic).await?; context.clone().run(async move { - let Ok(_) = subscription.read().await else { - return; - }; - info!("Received startup message"); - - // TODO: - // Read config file per docs in NOTES.md - // read nonces - // read headers - // read and process ALL of the snapshot files, not just one. - let span = info_span!("snapshot_bootstrapper.handle"); async { - let network_dir = format!("{}/{}", data_dir, network); - let config_path = format!("{}/config.json", network_dir); - let snapshots_path = format!("{}/snapshots.json", network_dir); + // Wait for startup signal + if let Err(e) = Self::wait_startup(startup_sub).await { + error!("Failed waiting for startup: {e:#}"); + return; + } + + // Wait for genesis bootstrap completion + if let Err(e) = Self::wait_genesis_completion(bootstrapped_sub).await { + error!("Failed waiting for bootstrapped: {e:#}"); + return; + } - let network_config = match Self::read_network_config(&config_path) { - Ok(cfg) => cfg, + info!("Bootstrap prerequisites met, starting snapshot processing"); + + // Load network configuration + let network_config = match Self::read_network_config(&cfg.config_path()) { + Ok(config) => config, Err(e) => { - error!("Failed to read network config: {}", e); + error!("Failed to read network config: {e:#}"); return; } }; - info!( - "Loading snapshots for epochs: {:?}", - network_config.snapshots - ); - - let all_snapshots = match Self::read_snapshots_metadata(&snapshots_path) { - Ok(snaps) => snaps, + // Load snapshots metadata + let all_snapshots = match Self::read_snapshots_metadata(&cfg.snapshots_path()) { + Ok(snapshots) => snapshots, Err(e) => { - error!("Failed to read snapshots metadata: {}", e); + error!("Failed to read snapshots metadata: {e:#}"); return; } }; - let target_snapshots: Vec<_> = all_snapshots - .iter() - .filter(|s| network_config.snapshots.contains(&s.epoch)) - .cloned() - .collect(); - + // Filter snapshots based on network config + let target_snapshots = Self::filter_snapshots(&network_config, &all_snapshots); if target_snapshots.is_empty() { error!( "No snapshots found for requested epochs: {:?}", @@ -355,35 +330,22 @@ impl SnapshotBootstrapper { return; } - info!("Found {} snapshot files to process", target_snapshots.len()); + info!("Found {} snapshot(s) to process", target_snapshots.len()); - for snapshot_meta in &target_snapshots { - let filename = format!("{}.cbor", snapshot_meta.point); - let file_path = format!("{}/{}", network_dir, filename); - - if let Err(e) = - Self::ensure_snapshot_downloaded(&file_path, snapshot_meta).await - { - error!("Failed to download snapshot: {}", e); - return; - } + // Download all snapshots + if let Err(e) = + Self::download_snapshots(&target_snapshots, &cfg.network_dir()).await + { + error!("Failed to download snapshots: {e:#}"); + return; } - for snapshot_meta in target_snapshots { - let filename = format!("{}.cbor", snapshot_meta.point); - let file_path = format!("{}/{}", network_dir, filename); - - info!( - "Processing snapshot for epoch {} from {}", - snapshot_meta.epoch, file_path - ); - - if let Err(e) = - Self::process_snapshot(&file_path, context.clone(), &completion_topic).await - { - error!("Failed to process snapshot: {}", e); - return; - } + // Process snapshots in order + if let Err(e) = + Self::process_snapshots(&target_snapshots, &cfg, context.clone()).await + { + error!("Failed to process snapshots: {e:#}"); + return; } info!("Snapshot bootstrap completed successfully"); @@ -395,7 +357,25 @@ impl SnapshotBootstrapper { Ok(()) } - /// Read network configuration + async fn wait_startup(mut subscription: Box>) -> Result<()> { + let (_, _message) = subscription.read().await?; + info!("Received startup message"); + Ok(()) + } + + async fn wait_genesis_completion( + mut subscription: Box>, + ) -> Result<()> { + let (_, message) = subscription.read().await?; + match message.as_ref() { + Message::Cardano((_, CardanoMessage::GenesisComplete(_complete))) => { + info!("Received genesis complete message"); + Ok(()) + } + msg => bail!("Unexpected message in bootstrapped topic: {msg:?}"), + } + } + fn read_network_config(path: &str) -> Result { let path_buf = PathBuf::from(path); let content = fs::read_to_string(&path_buf) @@ -407,7 +387,6 @@ impl SnapshotBootstrapper { Ok(config) } - /// Read snapshot metadata fn read_snapshots_metadata( path: &str, ) -> Result, SnapshotBootstrapError> { @@ -421,36 +400,48 @@ impl SnapshotBootstrapper { Ok(snapshots) } - /// Ensure the snapshot is downloaded - async fn ensure_snapshot_downloaded( - file_path: &str, - metadata: &SnapshotFileMetadata, + fn filter_snapshots( + network_config: &NetworkConfig, + all_snapshots: &[SnapshotFileMetadata], + ) -> Vec { + all_snapshots + .iter() + .filter(|s| network_config.snapshots.contains(&s.epoch)) + .cloned() + .collect() + } + + async fn download_snapshots( + snapshots: &[SnapshotFileMetadata], + network_dir: &str, ) -> Result<(), SnapshotBootstrapError> { - let path = Path::new(file_path); + for snapshot_meta in snapshots { + let filename = format!("{}.cbor", snapshot_meta.point); + let file_path = format!("{}/{}", network_dir, filename); - if path.exists() { - info!("Snapshot file already exists: {}", file_path); - return Ok(()); + Self::download_snapshot(&snapshot_meta.url, &file_path).await?; } - - info!( - "Downloading snapshot from {} to {}", - metadata.url, file_path - ); - Self::download_snapshot(&metadata.url, file_path).await?; - info!("Downloaded: {}", file_path); Ok(()) } async fn download_snapshot(url: &str, output_path: &str) -> Result<(), SnapshotBootstrapError> { - if let Some(parent) = Path::new(output_path).parent() { + let path = Path::new(output_path); + + if path.exists() { + info!("Snapshot already exists, skipping: {}", output_path); + return Ok(()); + } + + info!("Downloading snapshot from {}", url); + + if let Some(parent) = path.parent() { tokio::fs::create_dir_all(parent) .await .map_err(|e| SnapshotBootstrapError::CreateDirectory(parent.to_path_buf(), e))?; } let client = reqwest::Client::new(); - let response = client + let mut response = client .get(url) .send() .await @@ -463,52 +454,382 @@ impl SnapshotBootstrapper { )); } - let total_size = response.content_length().unwrap_or(0); - if total_size > 0 { - info!("Downloading {} MB (compressed)...", total_size / 1_000_000); - } - - let tmp_path = Path::new(output_path).with_extension("partial"); + let tmp_path = path.with_extension("partial"); let mut file = File::create(&tmp_path).await?; - let raw_stream_reader = - StreamReader::new(response.bytes_stream().map_err(io::Error::other)); - let buffered_reader = BufReader::new(raw_stream_reader); - let mut decoded_stream = GzipDecoder::new(buffered_reader); + let mut compressed_data = Vec::new(); + while let Some(chunk) = response + .chunk() + .await + .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))? + { + compressed_data.extend_from_slice(&chunk); + } + + let cursor = io::Cursor::new(&compressed_data); + let buffered = BufReader::new(cursor); + let mut decoder = GzipDecoder::new(buffered); + tokio::io::copy(&mut decoder, &mut file).await?; - tokio::io::copy(&mut decoded_stream, &mut file).await?; file.sync_all().await?; tokio::fs::rename(&tmp_path, output_path).await?; + info!("Downloaded snapshot to {}", output_path); Ok(()) } - /// Process a single snapshot file - async fn process_snapshot( - file_path: &str, + async fn process_snapshots( + snapshots: &[SnapshotFileMetadata], + cfg: &SnapshotConfig, context: Arc>, - completion_topic: &str, ) -> Result<()> { - let parser = StreamingSnapshotParser::new(file_path); - let mut callbacks = SnapshotHandler::new(context.clone(), completion_topic.to_string()); + let mut publisher = SnapshotPublisher::new( + context, + cfg.completion_topic.clone(), + cfg.snapshot_topic.clone(), + ); + + // Publish start once at the beginning + publisher.publish_start().await?; + + for snapshot_meta in snapshots { + let filename = format!("{}.cbor", snapshot_meta.point); + let file_path = format!("{}/{}", cfg.network_dir(), filename); + + info!( + "Processing snapshot for epoch {} from {}", + snapshot_meta.epoch, file_path + ); + + Self::parse_snapshot(&file_path, &mut publisher).await?; + } - info!("Starting snapshot parsing: {}", file_path); + let metadata = publisher + .metadata + .as_ref() + .ok_or_else(|| anyhow::anyhow!("No metadata received from snapshots"))?; + + let block_info = build_block_info_from_metadata(metadata); + publisher.publish_completion(block_info).await?; + + Ok(()) + } + + async fn parse_snapshot(file_path: &str, publisher: &mut SnapshotPublisher) -> Result<()> { + info!("Parsing snapshot: {}", file_path); let start = Instant::now(); - callbacks.publish_start().await?; - parser.parse(&mut callbacks)?; + let parser = StreamingSnapshotParser::new(file_path); + parser.parse(publisher)?; let duration = start.elapsed(); info!("Parsed snapshot in {:.2?}", duration); - // Build the final state from accumulated data - let block_info = callbacks.build_block_info()?; - let genesis_values = callbacks.build_genesis_values()?; + Ok(()) + } +} + +fn build_block_info_from_metadata(metadata: &SnapshotMetadata) -> BlockInfo { + BlockInfo { + status: BlockStatus::Immutable, + slot: 0, + number: 0, + hash: BlockHash::default(), + epoch: metadata.epoch, + epoch_slot: 0, + new_epoch: false, + timestamp: 0, + era: Era::Conway, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::io::Write; + use std::path::Path; + use tempfile::TempDir; + + fn create_test_network_config(dir: &Path, snapshots: Vec) -> PathBuf { + let config = NetworkConfig { + snapshots, + points: vec![Point { + epoch: 500, + id: "test_block_hash".to_string(), + slot: 12345678, + }], + }; + + let config_path = dir.join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(serde_json::to_string_pretty(&config).unwrap().as_bytes()).unwrap(); + config_path + } - // Publish completion message to trigger next phase (e.g., Mithril) - callbacks.publish_completion(block_info, genesis_values).await?; + fn create_test_snapshots_metadata(dir: &Path, epochs: Vec, base_url: &str) -> PathBuf { + let snapshots: Vec = epochs + .iter() + .map(|epoch| SnapshotFileMetadata { + epoch: *epoch, + point: format!("point_{}", epoch), + url: format!("{}/snapshot_{}.cbor.gz", base_url, epoch), + }) + .collect(); + + let snapshots_path = dir.join("snapshots.json"); + let mut file = fs::File::create(&snapshots_path).unwrap(); + file.write_all(serde_json::to_string_pretty(&snapshots).unwrap().as_bytes()).unwrap(); + snapshots_path + } - info!("Snapshot bootstrap completed successfully"); - Ok(()) + fn create_fake_snapshot(dir: &Path, point: &str) { + let snapshot_path = dir.join(format!("{}.cbor", point)); + let mut file = fs::File::create(&snapshot_path).unwrap(); + file.write_all(b"fake snapshot data").unwrap(); + } + + #[test] + fn test_read_network_config_success() { + let temp_dir = TempDir::new().unwrap(); + let config_path = create_test_network_config(temp_dir.path(), vec![500, 501]); + + let result = SnapshotBootstrapper::read_network_config(config_path.to_str().unwrap()); + assert!(result.is_ok()); + + let config = result.unwrap(); + assert_eq!(config.snapshots, vec![500, 501]); + assert_eq!(config.points.len(), 1); + } + + #[test] + fn test_read_network_config_missing_file() { + let result = SnapshotBootstrapper::read_network_config("/nonexistent/config.json"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + SnapshotBootstrapError::ReadNetworkConfig(_, _) + )); + } + + #[test] + fn test_read_network_config_malformed_json() { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(b"{ invalid json }").unwrap(); + + let result = SnapshotBootstrapper::read_network_config(config_path.to_str().unwrap()); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + SnapshotBootstrapError::MalformedNetworkConfig(_, _) + )); + } + + #[test] + fn test_read_snapshots_metadata_success() { + let temp_dir = TempDir::new().unwrap(); + let snapshots_path = + create_test_snapshots_metadata(temp_dir.path(), vec![500, 501], "https://example.com"); + + let result = + SnapshotBootstrapper::read_snapshots_metadata(snapshots_path.to_str().unwrap()); + assert!(result.is_ok()); + + let snapshots = result.unwrap(); + assert_eq!(snapshots.len(), 2); + assert_eq!(snapshots[0].epoch, 500); + assert_eq!(snapshots[1].epoch, 501); + } + + #[test] + fn test_read_snapshots_metadata_missing_file() { + let result = SnapshotBootstrapper::read_snapshots_metadata("/nonexistent/snapshots.json"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + SnapshotBootstrapError::ReadSnapshotsFile(_, _) + )); + } + + #[test] + fn test_filter_snapshots() { + let network_config = NetworkConfig { + snapshots: vec![500, 502], + points: vec![], + }; + + let all_snapshots = vec![ + SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "url1".to_string(), + }, + SnapshotFileMetadata { + epoch: 501, + point: "point_501".to_string(), + url: "url2".to_string(), + }, + SnapshotFileMetadata { + epoch: 502, + point: "point_502".to_string(), + url: "url3".to_string(), + }, + ]; + + let filtered = SnapshotBootstrapper::filter_snapshots(&network_config, &all_snapshots); + + assert_eq!(filtered.len(), 2); + assert_eq!(filtered[0].epoch, 500); + assert_eq!(filtered[1].epoch, 502); + } + + #[tokio::test] + async fn test_download_snapshot_skips_existing_file() { + let temp_dir = TempDir::new().unwrap(); + let point = "point_500"; + create_fake_snapshot(temp_dir.path(), point); + + let file_path = temp_dir.path().join(format!("{}.cbor", point)); + + let result = SnapshotBootstrapper::download_snapshot( + "https://example.com/snapshot.cbor.gz", + file_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_ok()); + assert!(file_path.exists()); + } + + #[tokio::test] + async fn test_download_snapshot_missing_file_fails() { + let temp_dir = TempDir::new().unwrap(); + let point = "point_500"; + let file_path = temp_dir.path().join(format!("{}.cbor", point)); + + let result = SnapshotBootstrapper::download_snapshot( + "https://invalid-url-that-does-not-exist.com/snapshot.cbor.gz", + file_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_err()); + assert!(!file_path.exists()); + } + + #[test] + fn test_snapshot_filtering_by_epoch() { + let temp_dir = TempDir::new().unwrap(); + create_test_network_config(temp_dir.path(), vec![500, 502]); + create_test_snapshots_metadata( + temp_dir.path(), + vec![500, 501, 502, 503], + "https://example.com", + ); + + let network_config = SnapshotBootstrapper::read_network_config( + temp_dir.path().join("config.json").to_str().unwrap(), + ) + .unwrap(); + + let all_snapshots = SnapshotBootstrapper::read_snapshots_metadata( + temp_dir.path().join("snapshots.json").to_str().unwrap(), + ) + .unwrap(); + + let target_snapshots = + SnapshotBootstrapper::filter_snapshots(&network_config, &all_snapshots); + + assert_eq!(target_snapshots.len(), 2); + assert_eq!(target_snapshots[0].epoch, 500); + assert_eq!(target_snapshots[1].epoch, 502); + } + + #[test] + fn test_empty_snapshots_list() { + let temp_dir = TempDir::new().unwrap(); + create_test_network_config(temp_dir.path(), vec![999]); + create_test_snapshots_metadata(temp_dir.path(), vec![500, 501], "https://example.com"); + + let network_config = SnapshotBootstrapper::read_network_config( + temp_dir.path().join("config.json").to_str().unwrap(), + ) + .unwrap(); + + let all_snapshots = SnapshotBootstrapper::read_snapshots_metadata( + temp_dir.path().join("snapshots.json").to_str().unwrap(), + ) + .unwrap(); + + let target_snapshots = + SnapshotBootstrapper::filter_snapshots(&network_config, &all_snapshots); + + assert!(target_snapshots.is_empty()); + } + + #[tokio::test] + async fn test_download_snapshot_creates_directory() { + let temp_dir = TempDir::new().unwrap(); + let nested_path = temp_dir.path().join("nested").join("directory").join("snapshot.cbor"); + + let _ = SnapshotBootstrapper::download_snapshot( + "https://invalid-url.com/snapshot.cbor.gz", + nested_path.to_str().unwrap(), + ) + .await; + + assert!(nested_path.parent().unwrap().exists()); + } + + #[test] + fn test_corrupted_config_json_fails_gracefully() { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(b"{\"snapshots\": [500, 501]").unwrap(); + + let result = SnapshotBootstrapper::read_network_config(config_path.to_str().unwrap()); + assert!(result.is_err()); + + if let Err(SnapshotBootstrapError::MalformedNetworkConfig(path, _)) = result { + assert_eq!(path, config_path); + } else { + panic!("Expected MalformedNetworkConfig error"); + } + } + + #[test] + fn test_corrupted_snapshots_json_fails_gracefully() { + let temp_dir = TempDir::new().unwrap(); + let snapshots_path = temp_dir.path().join("snapshots.json"); + let mut file = fs::File::create(&snapshots_path).unwrap(); + file.write_all(b"[{\"epoch\": 500}").unwrap(); + + let result = + SnapshotBootstrapper::read_snapshots_metadata(snapshots_path.to_str().unwrap()); + assert!(result.is_err()); + + if let Err(SnapshotBootstrapError::MalformedSnapshotsFile(path, _)) = result { + assert_eq!(path, snapshots_path); + } else { + panic!("Expected MalformedSnapshotsFile error"); + } + } + + #[tokio::test] + async fn test_download_creates_partial_file_then_renames() { + let temp_dir = TempDir::new().unwrap(); + let output_path = temp_dir.path().join("snapshot.cbor"); + + let result = SnapshotBootstrapper::download_snapshot( + "https://invalid-url.com/snapshot.cbor.gz", + output_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_err()); + assert!(!output_path.exists()); } } diff --git a/processes/omnibus/omnibus.toml b/processes/omnibus/omnibus.toml index f6805c04..4b25d34e 100644 --- a/processes/omnibus/omnibus.toml +++ b/processes/omnibus/omnibus.toml @@ -4,7 +4,7 @@ # Startup Configuration # ============================================================================ [startup] -method = "genesis" # Options: "genesis" | "snapshot" +method = "mithril" # Options: "mithril" | "snapshot" topic = "cardano.sequence.start" # ============================================================================ diff --git a/processes/omnibus/src/main.rs b/processes/omnibus/src/main.rs index 55d2cee8..66d49454 100644 --- a/processes/omnibus/src/main.rs +++ b/processes/omnibus/src/main.rs @@ -102,16 +102,17 @@ pub async fn main() -> Result<()> { let mut process = Process::::create(config.clone()).await; // Get startup method from config + // TODO: Lift to constant let startup_method = - config.get_string("startup.method").unwrap_or_else(|_| "snapshot".to_string()); + config.get_string("startup.method").unwrap_or_else(|_| "mithril".to_string()); info!("Using startup method: {}", startup_method); - // Register bootstrap modules based on startup method + // Register bootstrap modules based on the startup method match startup_method.as_str() { - "genesis" => { - info!("Registering GenesisBootstrapper"); - GenesisBootstrapper::register(&mut process); + "mithril" => { + info!("Registering MithrilSnapshotFetcher"); + MithrilSnapshotFetcher::register(&mut process); } "snapshot" => { info!("Registering SnapshotBootstrapper"); @@ -119,14 +120,14 @@ pub async fn main() -> Result<()> { } _ => { panic!( - "Invalid startup method: {}. Must be one of: genesis, snapshot", + "Invalid startup method: {}. Must be one of: mithril, snapshot", startup_method ); } } // Register modules - MithrilSnapshotFetcher::register(&mut process); + GenesisBootstrapper::register(&mut process); BlockUnpacker::register(&mut process); PeerNetworkInterface::register(&mut process); TxUnpacker::register(&mut process); From deb40cab411bfd4e7cb9baa04c95f1a48db2fb46 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Fri, 21 Nov 2025 16:58:17 -0800 Subject: [PATCH 06/28] feat: enhance snapshot download functionality with client reuse and improved error handling --- .../src/snapshot_bootstrapper.rs | 115 +++++++++++------- 1 file changed, 74 insertions(+), 41 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index 5b276d9e..f1167ce5 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -19,6 +19,7 @@ use anyhow::{bail, Result}; use async_compression::tokio::bufread::GzipDecoder; use caryatid_sdk::{module, Context, Subscription}; use config::Config; +use reqwest::Client; use serde::{Deserialize, Serialize}; use std::fs; use std::io; @@ -132,6 +133,12 @@ struct SnapshotFileMetadata { url: String, } +impl SnapshotFileMetadata { + fn file_path(&self, network_dir: &str) -> String { + format!("{}/{}.cbor", network_dir, self.point) + } +} + /// Handles publishing snapshot data to the message bus struct SnapshotPublisher { context: Arc>, @@ -197,6 +204,7 @@ impl PoolCallback for SnapshotPublisher { fn on_pools(&mut self, pools: Vec) -> Result<()> { info!("Received {} pools", pools.len()); self.pools.extend(pools); + // TODO: Accumulate pool data if needed or send in chunks to PoolState processor Ok(()) } } @@ -205,6 +213,7 @@ impl StakeCallback for SnapshotPublisher { fn on_accounts(&mut self, accounts: Vec) -> Result<()> { info!("Received {} accounts", accounts.len()); self.accounts.extend(accounts); + // TODO: Accumulate account data if needed or send in chunks to AccountState processor Ok(()) } } @@ -213,6 +222,7 @@ impl DRepCallback for SnapshotPublisher { fn on_dreps(&mut self, dreps: Vec) -> Result<()> { info!("Received {} DReps", dreps.len()); self.dreps.extend(dreps); + // TODO: Accumulate DRep data if needed or send in chunks to DRepState processor Ok(()) } } @@ -221,6 +231,7 @@ impl ProposalCallback for SnapshotPublisher { fn on_proposals(&mut self, proposals: Vec) -> Result<()> { info!("Received {} proposals", proposals.len()); self.proposals.extend(proposals); + // TODO: Accumulate proposal data if needed or send in chunks to ProposalState processor Ok(()) } } @@ -256,7 +267,6 @@ impl SnapshotCallbacks for SnapshotPublisher { info!(" - Accounts: {}", self.accounts.len()); info!(" - DReps: {}", self.dreps.len()); info!(" - Proposals: {}", self.proposals.len()); - // We could send a Resolver reference from here for large data, i.e. the UTXO set, // which could be a file reference. For a file reference, we'd extend the parser to // give us a callback value with the offset into the file; and we'd make the streaming @@ -273,6 +283,7 @@ impl SnapshotCallbacks for SnapshotPublisher { pub struct SnapshotBootstrapper; impl SnapshotBootstrapper { + /// Initializes the snapshot bootstrapper. pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { let cfg = SnapshotConfig::try_load(&config)?; @@ -288,7 +299,7 @@ impl SnapshotBootstrapper { context.clone().run(async move { let span = info_span!("snapshot_bootstrapper.handle"); async { - // Wait for startup signal + // Wait for the startup signal if let Err(e) = Self::wait_startup(startup_sub).await { error!("Failed waiting for startup: {e:#}"); return; @@ -415,16 +426,24 @@ impl SnapshotBootstrapper { snapshots: &[SnapshotFileMetadata], network_dir: &str, ) -> Result<(), SnapshotBootstrapError> { - for snapshot_meta in snapshots { - let filename = format!("{}.cbor", snapshot_meta.point); - let file_path = format!("{}/{}", network_dir, filename); + let client = Client::new(); - Self::download_snapshot(&snapshot_meta.url, &file_path).await?; + for snapshot_meta in snapshots { + let file_path = snapshot_meta.file_path(network_dir); + Self::download_snapshot(&client, &snapshot_meta.url, &file_path).await?; } Ok(()) } - async fn download_snapshot(url: &str, output_path: &str) -> Result<(), SnapshotBootstrapError> { + /// Downloads a gzip-compressed snapshot from the given URL, decompresses it on-the-fly, + /// and saves the decompressed CBOR data to the specified output path. + /// The data is first written to a `.partial` temporary file to ensure atomicity + /// and then renamed to the final output path upon successful completion. + async fn download_snapshot( + client: &Client, + url: &str, + output_path: &str, + ) -> Result<(), SnapshotBootstrapError> { let path = Path::new(output_path); if path.exists() { @@ -440,42 +459,53 @@ impl SnapshotBootstrapper { .map_err(|e| SnapshotBootstrapError::CreateDirectory(parent.to_path_buf(), e))?; } - let client = reqwest::Client::new(); - let mut response = client - .get(url) - .send() - .await - .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))?; - - if !response.status().is_success() { - return Err(SnapshotBootstrapError::DownloadInvalidStatusCode( - url.to_string(), - response.status(), - )); - } - let tmp_path = path.with_extension("partial"); - let mut file = File::create(&tmp_path).await?; - - let mut compressed_data = Vec::new(); - while let Some(chunk) = response - .chunk() - .await - .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))? - { - compressed_data.extend_from_slice(&chunk); - } - let cursor = io::Cursor::new(&compressed_data); - let buffered = BufReader::new(cursor); - let mut decoder = GzipDecoder::new(buffered); - tokio::io::copy(&mut decoder, &mut file).await?; + // Ensure cleanup on failure + let result = async { + let mut response = client + .get(url) + .send() + .await + .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))?; + + if !response.status().is_success() { + return Err(SnapshotBootstrapError::DownloadInvalidStatusCode( + url.to_string(), + response.status(), + )); + } - file.sync_all().await?; - tokio::fs::rename(&tmp_path, output_path).await?; + let mut file = File::create(&tmp_path).await?; - info!("Downloaded snapshot to {}", output_path); - Ok(()) + let mut compressed_data = Vec::new(); + while let Some(chunk) = response + .chunk() + .await + .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))? + { + compressed_data.extend_from_slice(&chunk); + } + + let cursor = io::Cursor::new(&compressed_data); + let buffered = BufReader::new(cursor); + let mut decoder = GzipDecoder::new(buffered); + tokio::io::copy(&mut decoder, &mut file).await?; + + file.sync_all().await?; + tokio::fs::rename(&tmp_path, output_path).await?; + + info!("Downloaded snapshot to {}", output_path); + Ok(()) + } + .await; + + // Clean up partial file on error + if result.is_err() { + let _ = tokio::fs::remove_file(&tmp_path).await; + } + + result } async fn process_snapshots( @@ -493,8 +523,7 @@ impl SnapshotBootstrapper { publisher.publish_start().await?; for snapshot_meta in snapshots { - let filename = format!("{}.cbor", snapshot_meta.point); - let file_path = format!("{}/{}", cfg.network_dir(), filename); + let file_path = snapshot_meta.file_path(&cfg.network_dir()); info!( "Processing snapshot for epoch {} from {}", @@ -694,6 +723,7 @@ mod tests { let file_path = temp_dir.path().join(format!("{}.cbor", point)); let result = SnapshotBootstrapper::download_snapshot( + &Client::new(), "https://example.com/snapshot.cbor.gz", file_path.to_str().unwrap(), ) @@ -710,6 +740,7 @@ mod tests { let file_path = temp_dir.path().join(format!("{}.cbor", point)); let result = SnapshotBootstrapper::download_snapshot( + &Client::new(), "https://invalid-url-that-does-not-exist.com/snapshot.cbor.gz", file_path.to_str().unwrap(), ) @@ -775,6 +806,7 @@ mod tests { let nested_path = temp_dir.path().join("nested").join("directory").join("snapshot.cbor"); let _ = SnapshotBootstrapper::download_snapshot( + &Client::new(), "https://invalid-url.com/snapshot.cbor.gz", nested_path.to_str().unwrap(), ) @@ -824,6 +856,7 @@ mod tests { let output_path = temp_dir.path().join("snapshot.cbor"); let result = SnapshotBootstrapper::download_snapshot( + &Client::new(), "https://invalid-url.com/snapshot.cbor.gz", output_path.to_str().unwrap(), ) From a44bb828ec6e940698b3bd85ed427fb7d9d16069 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Fri, 21 Nov 2025 17:03:21 -0800 Subject: [PATCH 07/28] feat: update NOTES.md with configuration details and bootstrapping sequence improvements --- common/src/snapshot/NOTES.md | 92 --------------------- modules/snapshot_bootstrapper/NOTES.md | 108 +++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 92 deletions(-) delete mode 100644 common/src/snapshot/NOTES.md create mode 100644 modules/snapshot_bootstrapper/NOTES.md diff --git a/common/src/snapshot/NOTES.md b/common/src/snapshot/NOTES.md deleted file mode 100644 index 6dfcd5ca..00000000 --- a/common/src/snapshot/NOTES.md +++ /dev/null @@ -1,92 +0,0 @@ -# Bootstrapping from a Snapshot file - -We can boot an Acropolis node either from genesis and replay all of the blocks up to -some point, or we can boot from a snapshot file. This module provides the components -needed to boot from a snapshot file. -See [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) for the process that -references and runs with these helpers. - -Booting from a snapshot takes minutes instead of the hours it takes to boot from -genesis. It also allows booting from a given epoch which allows one to create tests -that rely only on that epoch of data. We're also skipping some of the problematic -eras and will typically boot from Conway around epoch 305, 306, and 307. It takes -three epochs to have enough context to correctly calculate the rewards. - -The required data for bootstrapping are: - -- snapshot files (each has an associated epoch number and point) -- nonces -- headers - -## Snapshot Files - -The snapshots come from the Amaru project. In their words, -"the snapshots we generated are different [from a Mithril snapshot]: they're -the actual ledger state; i.e. the in-memory state that is constructed by iterating over each block up to a specific -point. So, it's all the UTxOs, the set of pending governance actions, the account balance, etc. -If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from disk. -The format of these is completely non-standard; we just forked the haskell node and spit out whatever we needed to in -CBOR." - -Snapshot files are referenced by their epoch number in the config.json file below. - -See [Amaru snapshot format](../../../docs/amaru-snapshot-structure.md) - -## Configuration files - -There is a path for each network bootstrap configuration file. Network Should -be one of 'mainnet', 'preprod', 'preview' or 'testnet_' where -`magic` is a 32-bits unsigned value denoting a particular testnet. - -Data structure, e.g. as [Amaru mainnet](https://github.com/pragma-org/amaru/tree/main/data/mainnet) - -The bootstrapper will be given a path to a directory that is expected to contain -the following files: snapshots.json, nonces.json, and headers.json. The path will -be used as a prefix to resolve per-network configuration files -needed for bootstrapping. Given a source directory `data`, and a -a network name of `preview`, the expected layout for configuration files would be: - -* `data/preview/config.json`: a list of epochs to load. -* `data/preview/snapshots.json`: a list of `Snapshot` values (epoch, point, url) -* `data/preview/nonces.json`: a list of `InitialNonces` values, -* `data/preview/headers.json`: a list of `Point`s. - -These files are loaded by [snapshot_bootstrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) -during bootup. - -## Bootstrapping sequence - -The bootstrapper will be started with an argument that specifies a network, -e.g. "mainnet". From the network, it will build a path to the configuration -and snapshot files as shown above, then load the data contained or described -in those files. config.json holds a list of typically 3 epochs that can be -used to index into snapshots.json to find the corresponding URLs and meta-data -for each of the three snapshot files. Loading occurs in this order: - -* publish `SnapshotMessage::Startup` -* download the snapshots (on demand; may have already been done externally) -* parse each snapshot and publish their data on the message bus -* read nonces and publish -* read headers and publish -* publish `CardanoMessage::GenesisComplete(GenesisCompleteMessage {...})` - -Modules in the system will have subscribed to the Startup message and also -to individual structural data update messages before the -bootstrapper runs the above sequence. Upon receiving the `Startup` message, -they will use data messages to populate their state, history (for BlockFrost), -and any other state required to achieve readiness to operate on reception of -the `GenesisCompleteMessage`. - -## Data update messages - -The bootstrapper will publish data as it parses the snapshot files, nonces, and -headers. Snapshot parsing is done while streaming the data to keep the memory -footprint lower. As elements of the file are parsed, callbacks provide the data -to the bootstrapper which publishes the data on the message bus. - -There are TODO markers in [snapshot_bootstrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) -that show where to add the -publishing of the parsed snapshot data. - - - diff --git a/modules/snapshot_bootstrapper/NOTES.md b/modules/snapshot_bootstrapper/NOTES.md new file mode 100644 index 00000000..28cae7a5 --- /dev/null +++ b/modules/snapshot_bootstrapper/NOTES.md @@ -0,0 +1,108 @@ +# Bootstrapping from a Snapshot file + +We can boot an Acropolis node either from genesis and replay all of the blocks up to +some point, or we can boot from a snapshot file. This module provides the components +needed to boot from a snapshot file. +See [snapshot_bootstrapper](../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) for the process that +references and runs with these helpers. + +Booting from a snapshot takes minutes instead of the hours it takes to boot from +genesis. It also allows booting from a given epoch which allows one to create tests +that rely only on that epoch of data. We're also skipping some of the problematic +eras and will typically boot from Conway around epoch 305, 306, and 307. It takes +three epochs to have enough context to correctly calculate the rewards. + +The required data for bootstrapping are: + +- snapshot files (each has an associated epoch number and point) +- nonces +- headers + +## Snapshot Files + +The snapshots come from the Amaru project. In their words, +"the snapshots we generated are different [from a Mithril snapshot]: they're +the actual ledger state; i.e. the in-memory state that is constructed by iterating over each block up to a specific +point. So, it's all the UTxOs, the set of pending governance actions, the account balance, etc. +If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from disk. +The format of these is completely non-standard; we just forked the haskell node and spit out whatever we needed to in +CBOR." + +Snapshot files are referenced by their epoch number in the config.json file below. + +See [Amaru snapshot format](../../docs/amaru-snapshot-structure.md) + +## Configuration files + +There is a path for each network bootstrap configuration file. Network should +be one of 'mainnet', 'preprod', 'preview' or 'testnet_' where +`magic` is a 32-bits unsigned value denoting a particular testnet. + +Data structure, e.g. as [Amaru mainnet](https://github.com/pragma-org/amaru/tree/main/data/mainnet) + +The bootstrapper will be given a path to a directory that is expected to contain +the following files: snapshots.json and config.json. The path will +be used as a prefix to resolve per-network configuration files +needed for bootstrapping. Given a source directory `data`, and a +a network name of `preview`, the expected layout for configuration files would be: + +* `data/preview/config.json`: a list of epochs to load and points +* `data/preview/snapshots.json`: a list of `SnapshotFileMetadata` values (epoch, point, url) + +These files are loaded by [snapshot_bootstrapper](../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) +during bootup. + +## Bootstrapping sequence + +The bootstrapper will be started with a configuration that specifies a network, +e.g. "mainnet". From the network, it will build a path to the configuration +and snapshot files as shown above, then load the data contained or described +in those files. config.json holds a list of typically 3 epochs that can be +used to index into snapshots.json to find the corresponding URLs and meta-data +for each of the three snapshot files. Loading occurs in this order: + +1. Wait for `startup-topic` message (typically `cardano.sequence.start`) +2. Wait for `bootstrapped-topic` message with genesis values (typically `cardano.sequence.bootstrapped`) +3. Load network configuration from `config.json` +4. Load snapshot metadata from `snapshots.json` +5. Filter snapshots based on epochs specified in config.json +6. Download snapshot files (skips if already present) +7. Publish `SnapshotMessage::Startup` to the snapshot topic +8. Parse each snapshot file using the streaming parser +9. Publish `CardanoMessage::SnapshotComplete` with final block info to the completion topic + +Modules in the system will have subscribed to the startup and completion topics before the +bootstrapper runs the above sequence. Upon receiving snapshot data messages, +they will use the data to populate their state, history (for BlockFrost), +and any other state required to achieve readiness to operate. + +## Data update messages + +The bootstrapper publishes data as it parses the snapshot files using the `SnapshotPublisher`. +Snapshot parsing is done while streaming the data to keep the memory +footprint lower. As elements of the file are parsed, callbacks provide the data +to the publisher which can then publish structured data on the message bus. + +The `SnapshotPublisher` implements the streaming snapshot callbacks: + +- `UtxoCallback`: Receives individual UTXO entries +- `PoolCallback`: Receives pool information +- `StakeCallback`: Receives account/stake information +- `DRepCallback`: Receives DRep (delegated representative) information +- `ProposalCallback`: Receives governance proposals +- `SnapshotCallbacks`: Receives metadata and completion signals + +Currently the publisher accumulates this data for statistics and future use. Publishing +of detailed snapshot data to downstream modules can be added by implementing the +appropriate message bus publishes in the callback methods. + +## Configuration + +The bootstrapper supports the following configuration options: + +- `network`: Network name (default: "mainnet") +- `data-dir`: Base directory for network data (default: "./data") +- `startup-topic`: Topic to wait for startup signal (default: "cardano.sequence.start") +- `snapshot-topic`: Topic to publish snapshot messages (default: "cardano.snapshot") +- `bootstrapped-subscribe-topic`: Topic to receive genesis completion (default: "cardano.sequence.bootstrapped") +- `completion-topic`: Topic to publish completion signal (default: "cardano.snapshot.complete") \ No newline at end of file From be946156d180f1569987581a2f4014e5aef5eae0 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Sun, 23 Nov 2025 08:31:27 -0800 Subject: [PATCH 08/28] feat: update dependencies and add ProgressReader for download tracking --- Cargo.lock | 2 + README.md | 13 ++--- modules/README.md | 6 +- modules/snapshot_bootstrapper/Cargo.toml | 2 + modules/snapshot_bootstrapper/NOTES.md | 4 +- .../src/progress_reader.rs | 55 +++++++++++++++++++ .../src/snapshot_bootstrapper.rs | 28 +++++----- processes/omnibus/src/main.rs | 17 +++--- 8 files changed, 95 insertions(+), 32 deletions(-) create mode 100644 modules/snapshot_bootstrapper/src/progress_reader.rs diff --git a/Cargo.lock b/Cargo.lock index 701927eb..a01831af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -376,12 +376,14 @@ dependencies = [ "async-compression", "caryatid_sdk", "config", + "futures-util", "reqwest 0.12.24", "serde", "serde_json", "tempfile", "thiserror 2.0.17", "tokio", + "tokio-util", "tracing", ] diff --git a/README.md b/README.md index 41cc4182..f28349ec 100644 --- a/README.md +++ b/README.md @@ -26,28 +26,23 @@ to communicate between micro-services. ```mermaid graph TB - subgraph Process A Module1(Module 1) Module2(Module 2) Caryatid1(Caryatid Framework) - Module1 <--> Caryatid1 Module2 <--> Caryatid1 end subgraph Process B Module3(Module 3) - Caryatid2(Caryatid Framework) - Module3 <--> Caryatid2 end RabbitMQ([RabbitMQ Message Bus]) - style RabbitMQ fill:#eff - + style RabbitMQ fill: #eff Caryatid1 <--> RabbitMQ Caryatid2 <--> RabbitMQ ``` @@ -61,6 +56,9 @@ graph TB Fetches a chain snapshot from Mithril and replays all the blocks in it - [Genesis Bootstrapper](modules/genesis_bootstrapper) - reads the Genesis file for a chain and generates initial UTXOs +- [Snapshot Bootstrapper](modules/snapshot_bootstrapper) - downloads ledger state snapshot files for configured epochs, + streams and parses the CBOR data (UTXOs, pools, accounts, DReps, proposals), and publishes completion messages to + signal snapshot readiness to other modules. - [Block Unpacker](modules/block_unpacker) - unpacks received blocks into individual transactions - [Tx Unpacker](modules/tx_unpacker) - parses transactions and generates UTXO @@ -69,7 +67,8 @@ graph TB - [SPO State](modules/spo_state) - matches SPO registrations and retirements - [DRep State](modules/drep_state) - tracks DRep registrations - [Governance State](modules/governance_state) - tracks Governance Actions and voting -- [Stake Delta Filter](modules/stake_delta_filter) - filters out stake address changes and handles stake pointer references +- [Stake Delta Filter](modules/stake_delta_filter) - filters out stake address changes and handles stake pointer + references - [Epochs State](modules/epochs_state) - track fees blocks minted and epochs history - [Accounts State](modules/accounts_state) - stake and reward accounts tracker - [Assets State](modules/assets_state) - tracks native asset supply, metadata, transactions, and addresses diff --git a/modules/README.md b/modules/README.md index 3d288502..c4ccdf3a 100644 --- a/modules/README.md +++ b/modules/README.md @@ -10,6 +10,9 @@ compose the Acropolis Architecture Fetches a chain snapshot from Mithril and replays all the blocks in it * [Genesis Bootstrapper](genesis_bootstrapper) - reads the Genesis file for a chain and generates initial UTXOs +* [Snapshot Bootstrapper](snapshot_bootstrapper) - downloads ledger state snapshot files for configured epochs, + streams and parses the CBOR data (UTXOs, pools, accounts, DReps, proposals), and publishes completion messages to + signal snapshot readiness to other modules. * [Block Unpacker](block_unpacker) - unpacks received blocks into individual transactions * [Tx Unpacker](tx_unpacker) - parses transactions and generates UTXO @@ -19,7 +22,6 @@ compose the Acropolis Architecture * [DRep State](drep_state) - tracks DRep registrations * [Governance State](governance_state) - tracks Governance Actions and voting * [Stake Delta Filter](stake_delta_filter) - filters out stake address changes and handles stake pointer references -* [Epoch Activity Counter](epoch_activity_couinter) - counts fees and block production for rewards * [Accounts State](accounts_state) - stake and reward accounts tracker ## How to add a new module @@ -88,7 +90,7 @@ to call `MyModule::register()` in the process `main()`: use acropolis_module_my_module::MyModule; // in main()... - MyModule::register(&mut process); +MyModule::register( & mut process); ``` You also need to mention the module in (e.g.) `omnibus.toml` to get it created, even if all diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index 3e16f4c6..b79d3c35 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -22,6 +22,8 @@ serde_json = "1.0.132" async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } reqwest = { version = "0.12", features = ["stream"] } thiserror = "2.0.17" +futures-util = "0.3.31" +tokio-util = "0.7.17" [dev-dependencies] tempfile = "3" diff --git a/modules/snapshot_bootstrapper/NOTES.md b/modules/snapshot_bootstrapper/NOTES.md index 28cae7a5..48dece30 100644 --- a/modules/snapshot_bootstrapper/NOTES.md +++ b/modules/snapshot_bootstrapper/NOTES.md @@ -92,8 +92,8 @@ The `SnapshotPublisher` implements the streaming snapshot callbacks: - `ProposalCallback`: Receives governance proposals - `SnapshotCallbacks`: Receives metadata and completion signals -Currently the publisher accumulates this data for statistics and future use. Publishing -of detailed snapshot data to downstream modules can be added by implementing the +Currently the publisher just accumulates this data, but this will need to be extended to publish the corresponding +message types. Publishing of detailed snapshot data to downstream modules can be added by implementing the appropriate message bus publishes in the callback methods. ## Configuration diff --git a/modules/snapshot_bootstrapper/src/progress_reader.rs b/modules/snapshot_bootstrapper/src/progress_reader.rs new file mode 100644 index 00000000..e042d746 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/progress_reader.rs @@ -0,0 +1,55 @@ +use std::pin::Pin; +use std::task::{Context, Poll}; +use tracing::info; + +pub struct ProgressReader { + inner: R, + bytes_read: u64, + last_log: u64, + log_interval: u64, + total_size: Option, +} + +impl ProgressReader { + pub fn new(inner: R, total_size: Option, log_interval_mb: u64) -> Self { + Self { + inner, + bytes_read: 0, + last_log: 0, + log_interval: log_interval_mb * 1024 * 1024, + total_size, + } + } +} + +impl tokio::io::AsyncRead for ProgressReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + let before = buf.filled().len(); + let result = Pin::new(&mut self.inner).poll_read(cx, buf); + let after = buf.filled().len(); + let bytes_read = (after - before) as u64; + + self.bytes_read += bytes_read; + + if self.bytes_read - self.last_log >= self.log_interval { + if let Some(total) = self.total_size { + let percent = (self.bytes_read as f64 / total as f64) * 100.0; + info!( + "Download progress: {:.1}% ({} MB / {} MB)", + percent, + self.bytes_read / (1024 * 1024), + total / (1024 * 1024) + ); + } else { + info!("Downloaded {} MB", self.bytes_read / (1024 * 1024)); + } + self.last_log = self.bytes_read; + } + + result + } +} diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index f1167ce5..7f609993 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -1,8 +1,11 @@ +mod progress_reader; + use std::{ path::{Path, PathBuf}, sync::Arc, }; +use crate::progress_reader::ProgressReader; use acropolis_common::{ messages::{CardanoMessage, Message}, snapshot::{ @@ -19,6 +22,7 @@ use anyhow::{bail, Result}; use async_compression::tokio::bufread::GzipDecoder; use caryatid_sdk::{module, Context, Subscription}; use config::Config; +use futures_util::TryStreamExt; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::fs; @@ -461,9 +465,8 @@ impl SnapshotBootstrapper { let tmp_path = path.with_extension("partial"); - // Ensure cleanup on failure let result = async { - let mut response = client + let response = client .get(url) .send() .await @@ -476,31 +479,28 @@ impl SnapshotBootstrapper { )); } + let content_length = response.content_length(); let mut file = File::create(&tmp_path).await?; - let mut compressed_data = Vec::new(); - while let Some(chunk) = response - .chunk() - .await - .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))? - { - compressed_data.extend_from_slice(&chunk); - } + let stream = response.bytes_stream(); + let async_read = tokio_util::io::StreamReader::new( + stream.map_err(|e| io::Error::new(io::ErrorKind::Other, e)), + ); - let cursor = io::Cursor::new(&compressed_data); - let buffered = BufReader::new(cursor); + let progress_reader = ProgressReader::new(async_read, content_length, 100); + let buffered = BufReader::new(progress_reader); let mut decoder = GzipDecoder::new(buffered); + tokio::io::copy(&mut decoder, &mut file).await?; file.sync_all().await?; tokio::fs::rename(&tmp_path, output_path).await?; - info!("Downloaded snapshot to {}", output_path); + info!("Downloaded and decompressed snapshot to {}", output_path); Ok(()) } .await; - // Clean up partial file on error if result.is_err() { let _ = tokio::fs::remove_file(&tmp_path).await; } diff --git a/processes/omnibus/src/main.rs b/processes/omnibus/src/main.rs index 66d49454..49f2cea3 100644 --- a/processes/omnibus/src/main.rs +++ b/processes/omnibus/src/main.rs @@ -45,6 +45,10 @@ use tracing_opentelemetry::OpenTelemetryLayer; use tracing_subscriber::prelude::*; use tracing_subscriber::{filter, fmt, EnvFilter, Registry}; +const STARTUP_METHOD_MITHRIL: &str = "mithril"; +const STARTUP_METHOD_SNAPSHOT: &str = "snapshot"; +const CONFIG_KEY_STARTUP_METHOD: &str = "startup.method"; + #[cfg(not(target_env = "msvc"))] use tikv_jemallocator::Jemalloc; #[cfg(not(target_env = "msvc"))] @@ -94,27 +98,26 @@ pub async fn main() -> Result<()> { Config::builder() .add_source(File::with_name(&args.config)) .add_source(Environment::with_prefix("ACROPOLIS")) - .build() - .unwrap(), + .build()?, ); // Create the process let mut process = Process::::create(config.clone()).await; // Get startup method from config - // TODO: Lift to constant - let startup_method = - config.get_string("startup.method").unwrap_or_else(|_| "mithril".to_string()); + let startup_method = config + .get_string(CONFIG_KEY_STARTUP_METHOD) + .unwrap_or_else(|_| STARTUP_METHOD_MITHRIL.to_string()); info!("Using startup method: {}", startup_method); // Register bootstrap modules based on the startup method match startup_method.as_str() { - "mithril" => { + STARTUP_METHOD_MITHRIL => { info!("Registering MithrilSnapshotFetcher"); MithrilSnapshotFetcher::register(&mut process); } - "snapshot" => { + STARTUP_METHOD_SNAPSHOT => { info!("Registering SnapshotBootstrapper"); SnapshotBootstrapper::register(&mut process); } From a0175f889ad00dd6a4e414e40630af52256b6ac3 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Sun, 23 Nov 2025 09:02:56 -0800 Subject: [PATCH 09/28] feat: add headers and nonces JSON files, enhance HTTP client configuration in snapshot bootstrapper --- .../data/mainnet/headers.json | 4 ++++ .../snapshot_bootstrapper/data/mainnet/nonces.json | 7 +++++++ .../src/snapshot_bootstrapper.rs | 14 +++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 modules/snapshot_bootstrapper/data/mainnet/headers.json create mode 100644 modules/snapshot_bootstrapper/data/mainnet/nonces.json diff --git a/modules/snapshot_bootstrapper/data/mainnet/headers.json b/modules/snapshot_bootstrapper/data/mainnet/headers.json new file mode 100644 index 00000000..adbfb813 --- /dev/null +++ b/modules/snapshot_bootstrapper/data/mainnet/headers.json @@ -0,0 +1,4 @@ +[ + "134524751.93d554d67c46749f45fba3a091857a9c489ad3ed1d2c7b32b587ab290bec51f5", + "134956761.13cb4a62597e36fad2dba4e00974ec5ac29c3824d96b2ceb4ce056271cd4f8da" +] \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/data/mainnet/nonces.json b/modules/snapshot_bootstrapper/data/mainnet/nonces.json new file mode 100644 index 00000000..3d38d6f5 --- /dev/null +++ b/modules/snapshot_bootstrapper/data/mainnet/nonces.json @@ -0,0 +1,7 @@ +{ + "at": "134956789.6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c", + "active": "0b9e320e63bf995b81287ce7a624b6735d98b083cc1a0e2ae8b08b680c79c983", + "candidate": "6cc4dafecbe0d593ca0dee64518542f5faa741538791ac7fc2d5008f32d5c4d5", + "evolving": "f5589f01dd0efd0add0c58e8b27dc73ba3fcd662d9026b3fedbf06c648adb313", + "tail": "29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494" +} \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs index 7f609993..e97b1ddc 100644 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs @@ -3,6 +3,7 @@ mod progress_reader; use std::{ path::{Path, PathBuf}, sync::Arc, + time, }; use crate::progress_reader::ProgressReader; @@ -58,6 +59,9 @@ pub enum SnapshotBootstrapError { #[error("Failed to download snapshot from {0}: {1}")] DownloadError(String, reqwest::Error), + #[error("Failed to initalize HTTP client: {0:?}")] + ClientError(reqwest::Error), + #[error("Download failed from {0}: HTTP status {1}")] DownloadInvalidStatusCode(String, reqwest::StatusCode), @@ -430,7 +434,11 @@ impl SnapshotBootstrapper { snapshots: &[SnapshotFileMetadata], network_dir: &str, ) -> Result<(), SnapshotBootstrapError> { - let client = Client::new(); + let client = Client::builder() + .timeout(std::time::Duration::from_mins(5)) + .connect_timeout(std::time::Duration::from_secs(30)) + .build() + .map_err(|e| SnapshotBootstrapError::ClientError(e))?; for snapshot_meta in snapshots { let file_path = snapshot_meta.file_path(network_dir); @@ -484,10 +492,10 @@ impl SnapshotBootstrapper { let stream = response.bytes_stream(); let async_read = tokio_util::io::StreamReader::new( - stream.map_err(|e| io::Error::new(io::ErrorKind::Other, e)), + stream.map_err(|e| io::Error::new(std::io::ErrorKind::Other, e)), ); - let progress_reader = ProgressReader::new(async_read, content_length, 100); + let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); let mut decoder = GzipDecoder::new(buffered); From 724d8ec710d0f75497d663ef525ffde641527e39 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Sun, 23 Nov 2025 10:15:49 -0800 Subject: [PATCH 10/28] feat: split bootstrapper into a few modules with clearly defined responsibilities --- modules/snapshot_bootstrapper/Cargo.toml | 2 +- modules/snapshot_bootstrapper/NOTES.md | 4 +- modules/snapshot_bootstrapper/README.md | 47 + .../snapshot_bootstrapper/src/bootstrapper.rs | 221 +++++ modules/snapshot_bootstrapper/src/config.rs | 350 +++++++ .../snapshot_bootstrapper/src/downloader.rs | 215 +++++ .../snapshot_bootstrapper/src/publisher.rs | 153 +++ .../src/snapshot_bootstrapper.rs | 876 ------------------ 8 files changed, 989 insertions(+), 879 deletions(-) create mode 100644 modules/snapshot_bootstrapper/README.md create mode 100644 modules/snapshot_bootstrapper/src/bootstrapper.rs create mode 100644 modules/snapshot_bootstrapper/src/config.rs create mode 100644 modules/snapshot_bootstrapper/src/downloader.rs create mode 100644 modules/snapshot_bootstrapper/src/publisher.rs delete mode 100644 modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index b79d3c35..aa569913 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -29,4 +29,4 @@ tokio-util = "0.7.17" tempfile = "3" [lib] -path = "src/snapshot_bootstrapper.rs" +path = "src/bootstrapper.rs" diff --git a/modules/snapshot_bootstrapper/NOTES.md b/modules/snapshot_bootstrapper/NOTES.md index 48dece30..35d40e34 100644 --- a/modules/snapshot_bootstrapper/NOTES.md +++ b/modules/snapshot_bootstrapper/NOTES.md @@ -3,7 +3,7 @@ We can boot an Acropolis node either from genesis and replay all of the blocks up to some point, or we can boot from a snapshot file. This module provides the components needed to boot from a snapshot file. -See [snapshot_bootstrapper](../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) for the process that +See [snapshot_bootstrapper](src/bootstrapper.rs) for the process that references and runs with these helpers. Booting from a snapshot takes minutes instead of the hours it takes to boot from @@ -49,7 +49,7 @@ a network name of `preview`, the expected layout for configuration files would b * `data/preview/config.json`: a list of epochs to load and points * `data/preview/snapshots.json`: a list of `SnapshotFileMetadata` values (epoch, point, url) -These files are loaded by [snapshot_bootstrapper](../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) +These files are loaded by [snapshot_bootstrapper](src/bootstrapper.rs) during bootup. ## Bootstrapping sequence diff --git a/modules/snapshot_bootstrapper/README.md b/modules/snapshot_bootstrapper/README.md new file mode 100644 index 00000000..8ea90146 --- /dev/null +++ b/modules/snapshot_bootstrapper/README.md @@ -0,0 +1,47 @@ +# Snapshot Bootstrapper Module + +The snapshot bootstrapper module downloads and processes Cardano ledger snapshots to initialize system state before +processing the live chain. + +## Overview + +This module: + +1. Waits for genesis bootstrap completion +2. Downloads compressed snapshot files from configured URLs +3. Streams and publishes snapshot data (UTXOs, pools, accounts, DReps, proposals) +4. Signals completion to allow chain synchronization to begin + +## Messages + +The snapshot bootstrapper: + +- **Subscribes to** `cardano.sequence.start` - Waits for startup signal +- **Subscribes to** `cardano.sequence.bootstrapped` - Waits for genesis completion +- **Publishes to** `cardano.snapshot` - Streams snapshot data during processing +- **Publishes to** `cardano.snapshot.complete` - Signals completion with block info + +## Default Configuration + +```toml +[module.snapshot-bootstrapper] + +# Network and data +network = "mainnet" +data-dir = "./data" + +# Message topics +startup-topic = "cardano.sequence.start" +snapshot-topic = "cardano.snapshot" +bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" +completion-topic = "cardano.snapshot.complete" +``` + +## Directory Structure + +The module expects the following files in `{data-dir}/{network}/`: + +- **`config.json`** - Network configuration specifying which snapshot epochs to load +- **`snapshots.json`** - Snapshot metadata including download URLs + +Snapshot files are downloaded to `{data-dir}/{network}/{point}.cbor`. \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs new file mode 100644 index 00000000..d876ba19 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -0,0 +1,221 @@ +mod config; +mod downloader; +mod progress_reader; +mod publisher; + +use crate::config::{ConfigError, NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; +use crate::downloader::{DownloadError, SnapshotDownloader}; +use crate::publisher::SnapshotPublisher; +use ::config::Config; +use acropolis_common::{ + messages::{CardanoMessage, Message}, + snapshot::StreamingSnapshotParser, + BlockHash, BlockInfo, BlockStatus, Era, +}; +use anyhow::{bail, Result}; +use caryatid_sdk::{module, Context, Subscription}; +use std::sync::Arc; +use thiserror::Error; +use tokio::time::Instant; +use tracing::{error, info, info_span, Instrument}; + +#[derive(Debug, Error)] +pub enum BootstrapError { + #[error("Configuration error: {0}")] + Config(#[from] ConfigError), + + #[error("Download error: {0}")] + Download(#[from] DownloadError), + + #[error("Snapshot parsing failed: {0}")] + Parse(String), + + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +#[module( + message_type(Message), + name = "snapshot-bootstrapper", + description = "Snapshot Bootstrapper to broadcast state via streaming" +)] +pub struct SnapshotBootstrapper; + +impl SnapshotBootstrapper { + /// Initializes the snapshot bootstrapper. + pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { + let cfg = SnapshotConfig::try_load(&config)?; + + info!("Snapshot bootstrapper initializing"); + info!(" Network: {}", cfg.network); + info!(" Data directory: {}", cfg.data_dir); + info!(" Publishing on '{}'", cfg.snapshot_topic); + info!(" Completing with '{}'", cfg.completion_topic); + + let startup_sub = context.subscribe(&cfg.startup_topic).await?; + let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_topic).await?; + + context.clone().run(async move { + let span = info_span!("snapshot_bootstrapper.handle"); + async { + // Wait for the startup signal + if let Err(e) = Self::wait_startup(startup_sub).await { + error!("Failed waiting for startup: {e:#}"); + return; + } + + // Wait for genesis bootstrap completion + if let Err(e) = Self::wait_genesis_completion(bootstrapped_sub).await { + error!("Failed waiting for bootstrapped: {e:#}"); + return; + } + + info!("Bootstrap prerequisites met, starting snapshot processing"); + + // Load network configuration + let network_config = match NetworkConfig::read_from_file(&cfg.config_path()) { + Ok(config) => config, + Err(e) => { + error!("Failed to read network config: {e:#}"); + return; + } + }; + + // Load snapshots metadata + let all_snapshots = + match SnapshotFileMetadata::read_all_from_file(&cfg.snapshots_path()) { + Ok(snapshots) => snapshots, + Err(e) => { + error!("Failed to read snapshots metadata: {e:#}"); + return; + } + }; + + // Filter snapshots based on network config + let target_snapshots = SnapshotFileMetadata::filter_by_epochs( + &all_snapshots, + &network_config.snapshots, + ); + if target_snapshots.is_empty() { + error!( + "No snapshots found for requested epochs: {:?}", + network_config.snapshots + ); + return; + } + + info!("Found {} snapshot(s) to process", target_snapshots.len()); + + // Create downloader and download all snapshots + let downloader = match SnapshotDownloader::new(cfg.network_dir()) { + Ok(d) => d, + Err(e) => { + error!("Failed to create snapshot downloader: {e:#}"); + return; + } + }; + + if let Err(e) = downloader.download_all(&target_snapshots).await { + error!("Failed to download snapshots: {e:#}"); + return; + } + + // Process snapshots in order + if let Err(e) = + Self::process_snapshots(&target_snapshots, &cfg, context.clone()).await + { + error!("Failed to process snapshots: {e:#}"); + return; + } + + info!("Snapshot bootstrap completed successfully"); + } + .instrument(span) + .await; + }); + + Ok(()) + } + + async fn wait_startup(mut subscription: Box>) -> Result<()> { + let (_, _message) = subscription.read().await?; + info!("Received startup message"); + Ok(()) + } + + async fn wait_genesis_completion( + mut subscription: Box>, + ) -> Result<()> { + let (_, message) = subscription.read().await?; + match message.as_ref() { + Message::Cardano((_, CardanoMessage::GenesisComplete(_complete))) => { + info!("Received genesis complete message"); + Ok(()) + } + msg => bail!("Unexpected message in bootstrapped topic: {msg:?}"), + } + } + + async fn process_snapshots( + snapshots: &[SnapshotFileMetadata], + cfg: &SnapshotConfig, + context: Arc>, + ) -> Result<()> { + let mut publisher = SnapshotPublisher::new( + context, + cfg.completion_topic.clone(), + cfg.snapshot_topic.clone(), + ); + + publisher.publish_start().await?; + + for snapshot_meta in snapshots { + let file_path = snapshot_meta.file_path(&cfg.network_dir()); + + info!( + "Processing snapshot for epoch {} from {}", + snapshot_meta.epoch, file_path + ); + + Self::parse_snapshot(&file_path, &mut publisher).await?; + } + + let metadata = publisher + .metadata() + .ok_or_else(|| anyhow::anyhow!("No metadata received from snapshots"))?; + + let block_info = build_block_info_from_metadata(metadata); + publisher.publish_completion(block_info).await?; + + Ok(()) + } + + async fn parse_snapshot(file_path: &str, publisher: &mut SnapshotPublisher) -> Result<()> { + info!("Parsing snapshot: {}", file_path); + let start = Instant::now(); + + let parser = StreamingSnapshotParser::new(file_path); + parser.parse(publisher)?; + + let duration = start.elapsed(); + info!("Parsed snapshot in {:.2?}", duration); + + Ok(()) + } +} + +fn build_block_info_from_metadata( + metadata: &acropolis_common::snapshot::streaming_snapshot::SnapshotMetadata, +) -> BlockInfo { + BlockInfo { + status: BlockStatus::Immutable, + slot: 0, + number: 0, + hash: BlockHash::default(), + epoch: metadata.epoch, + epoch_slot: 0, + new_epoch: false, + timestamp: 0, + era: Era::Conway, + } +} diff --git a/modules/snapshot_bootstrapper/src/config.rs b/modules/snapshot_bootstrapper/src/config.rs new file mode 100644 index 00000000..71347730 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/config.rs @@ -0,0 +1,350 @@ +use anyhow::Result; +use config::Config; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::io; +use std::path::PathBuf; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ConfigError { + #[error("Cannot read network config file {0}: {1}")] + ReadNetworkConfig(PathBuf, io::Error), + + #[error("Cannot read snapshots metadata file {0}: {1}")] + ReadSnapshotsFile(PathBuf, io::Error), + + #[error("Failed to parse network config {0}: {1}")] + MalformedNetworkConfig(PathBuf, serde_json::Error), + + #[error("Failed to parse snapshots JSON file {0}: {1}")] + MalformedSnapshotsFile(PathBuf, serde_json::Error), +} + +const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; +const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; +const DEFAULT_COMPLETION_TOPIC: &str = "cardano.snapshot.complete"; +const DEFAULT_BOOTSTRAPPED_TOPIC: &str = "cardano.sequence.bootstrapped"; + +/// Configuration for the snapshot bootstrapper +#[derive(Debug, Clone)] +pub struct SnapshotConfig { + pub network: String, + pub data_dir: String, + pub startup_topic: String, + pub snapshot_topic: String, + pub bootstrapped_topic: String, + pub completion_topic: String, +} + +impl SnapshotConfig { + pub fn try_load(config: &Config) -> Result { + Ok(Self { + network: config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()), + data_dir: config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()), + startup_topic: config + .get_string("startup-topic") + .unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()), + snapshot_topic: config + .get_string("snapshot-topic") + .unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()), + bootstrapped_topic: config + .get_string("bootstrapped-subscribe-topic") + .unwrap_or(DEFAULT_BOOTSTRAPPED_TOPIC.to_string()), + completion_topic: config + .get_string("completion-topic") + .unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()), + }) + } + + pub fn network_dir(&self) -> String { + format!("{}/{}", self.data_dir, self.network) + } + + pub fn config_path(&self) -> String { + format!("{}/config.json", self.network_dir()) + } + + pub fn snapshots_path(&self) -> String { + format!("{}/snapshots.json", self.network_dir()) + } +} + +/// Network configuration file (config.json) +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct NetworkConfig { + pub snapshots: Vec, + pub points: Vec, +} + +impl NetworkConfig { + pub fn read_from_file(path: &str) -> Result { + let path_buf = PathBuf::from(path); + let content = fs::read_to_string(&path_buf) + .map_err(|e| ConfigError::ReadNetworkConfig(path_buf.clone(), e))?; + + let config: NetworkConfig = serde_json::from_str(&content) + .map_err(|e| ConfigError::MalformedNetworkConfig(path_buf, e))?; + + Ok(config) + } +} + +/// Point +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct Point { + pub epoch: u64, + pub id: String, + pub slot: u64, +} + +/// Snapshot metadata from snapshots.json +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct SnapshotFileMetadata { + pub epoch: u64, + pub point: String, + pub url: String, +} + +impl SnapshotFileMetadata { + pub fn read_all_from_file(path: &str) -> Result, ConfigError> { + let path_buf = PathBuf::from(path); + let content = fs::read_to_string(&path_buf) + .map_err(|e| ConfigError::ReadSnapshotsFile(path_buf.clone(), e))?; + + let snapshots: Vec = serde_json::from_str(&content) + .map_err(|e| ConfigError::MalformedSnapshotsFile(path_buf, e))?; + + Ok(snapshots) + } + + pub fn file_path(&self, network_dir: &str) -> String { + format!("{}/{}.cbor", network_dir, self.point) + } + + pub fn filter_by_epochs(snapshots: &[Self], epochs: &[u64]) -> Vec { + snapshots.iter().filter(|s| epochs.contains(&s.epoch)).cloned().collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use std::path::Path; + use tempfile::TempDir; + + fn create_test_network_config(dir: &Path, snapshots: Vec) -> PathBuf { + let config = NetworkConfig { + snapshots, + points: vec![Point { + epoch: 500, + id: "test_block_hash".to_string(), + slot: 12345678, + }], + }; + + let config_path = dir.join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(serde_json::to_string_pretty(&config).unwrap().as_bytes()).unwrap(); + config_path + } + + fn create_test_snapshots_metadata(dir: &Path, epochs: Vec, base_url: &str) -> PathBuf { + let snapshots: Vec = epochs + .iter() + .map(|epoch| SnapshotFileMetadata { + epoch: *epoch, + point: format!("point_{}", epoch), + url: format!("{}/snapshot_{}.cbor.gz", base_url, epoch), + }) + .collect(); + + let snapshots_path = dir.join("snapshots.json"); + let mut file = fs::File::create(&snapshots_path).unwrap(); + file.write_all(serde_json::to_string_pretty(&snapshots).unwrap().as_bytes()).unwrap(); + snapshots_path + } + + #[test] + fn test_snapshot_config_network_dir() { + let config = SnapshotConfig { + network: "mainnet".to_string(), + data_dir: "./data".to_string(), + startup_topic: "startup".to_string(), + snapshot_topic: "snapshot".to_string(), + bootstrapped_topic: "bootstrapped".to_string(), + completion_topic: "completion".to_string(), + }; + + assert_eq!(config.network_dir(), "./data/mainnet"); + } + + #[test] + fn test_snapshot_config_config_path() { + let config = SnapshotConfig { + network: "preprod".to_string(), + data_dir: "/var/data".to_string(), + startup_topic: "startup".to_string(), + snapshot_topic: "snapshot".to_string(), + bootstrapped_topic: "bootstrapped".to_string(), + completion_topic: "completion".to_string(), + }; + + assert_eq!(config.config_path(), "/var/data/preprod/config.json"); + } + + #[test] + fn test_snapshot_config_snapshots_path() { + let config = SnapshotConfig { + network: "mainnet".to_string(), + data_dir: "./data".to_string(), + startup_topic: "startup".to_string(), + snapshot_topic: "snapshot".to_string(), + bootstrapped_topic: "bootstrapped".to_string(), + completion_topic: "completion".to_string(), + }; + + assert_eq!(config.snapshots_path(), "./data/mainnet/snapshots.json"); + } + + #[test] + fn test_snapshot_file_metadata_file_path() { + let metadata = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "https://example.com/snapshot.cbor.gz".to_string(), + }; + + assert_eq!( + metadata.file_path("/data/mainnet"), + "/data/mainnet/point_500.cbor" + ); + } + + #[test] + fn test_filter_by_epochs() { + let all_snapshots = vec![ + SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "url1".to_string(), + }, + SnapshotFileMetadata { + epoch: 501, + point: "point_501".to_string(), + url: "url2".to_string(), + }, + SnapshotFileMetadata { + epoch: 502, + point: "point_502".to_string(), + url: "url3".to_string(), + }, + ]; + + let filtered = SnapshotFileMetadata::filter_by_epochs(&all_snapshots, &[500, 502]); + + assert_eq!(filtered.len(), 2); + assert_eq!(filtered[0].epoch, 500); + assert_eq!(filtered[1].epoch, 502); + } + + #[test] + fn test_read_network_config_success() { + let temp_dir = TempDir::new().unwrap(); + let config_path = create_test_network_config(temp_dir.path(), vec![500, 501]); + + let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); + assert!(result.is_ok()); + + let config = result.unwrap(); + assert_eq!(config.snapshots, vec![500, 501]); + assert_eq!(config.points.len(), 1); + } + + #[test] + fn test_read_network_config_missing_file() { + let result = NetworkConfig::read_from_file("/nonexistent/config.json"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + ConfigError::ReadNetworkConfig(_, _) + )); + } + + #[test] + fn test_read_network_config_malformed_json() { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(b"{ invalid json }").unwrap(); + + let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + ConfigError::MalformedNetworkConfig(_, _) + )); + } + + #[test] + fn test_read_snapshots_metadata_success() { + let temp_dir = TempDir::new().unwrap(); + let snapshots_path = + create_test_snapshots_metadata(temp_dir.path(), vec![500, 501], "https://example.com"); + + let result = SnapshotFileMetadata::read_all_from_file(snapshots_path.to_str().unwrap()); + assert!(result.is_ok()); + + let snapshots = result.unwrap(); + assert_eq!(snapshots.len(), 2); + assert_eq!(snapshots[0].epoch, 500); + assert_eq!(snapshots[1].epoch, 501); + } + + #[test] + fn test_read_snapshots_metadata_missing_file() { + let result = SnapshotFileMetadata::read_all_from_file("/nonexistent/snapshots.json"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + ConfigError::ReadSnapshotsFile(_, _) + )); + } + + #[test] + fn test_corrupted_config_json_fails_gracefully() { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(b"{\"snapshots\": [500, 501]").unwrap(); + + let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); + assert!(result.is_err()); + + if let Err(ConfigError::MalformedNetworkConfig(path, _)) = result { + assert_eq!(path, config_path); + } else { + panic!("Expected MalformedNetworkConfig error"); + } + } + + #[test] + fn test_corrupted_snapshots_json_fails_gracefully() { + let temp_dir = TempDir::new().unwrap(); + let snapshots_path = temp_dir.path().join("snapshots.json"); + let mut file = fs::File::create(&snapshots_path).unwrap(); + file.write_all(b"[{\"epoch\": 500}").unwrap(); + + let result = SnapshotFileMetadata::read_all_from_file(snapshots_path.to_str().unwrap()); + assert!(result.is_err()); + + if let Err(ConfigError::MalformedSnapshotsFile(path, _)) = result { + assert_eq!(path, snapshots_path); + } else { + panic!("Expected MalformedSnapshotsFile error"); + } + } +} diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs new file mode 100644 index 00000000..ea3e7332 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -0,0 +1,215 @@ +use crate::config::SnapshotFileMetadata; +use crate::progress_reader::ProgressReader; +use async_compression::tokio::bufread::GzipDecoder; +use futures_util::TryStreamExt; +use reqwest::Client; +use std::io; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use tokio::fs::File; +use tokio::io::BufReader; +use tracing::info; + +#[derive(Debug, Error)] +pub enum DownloadError { + #[error("Failed to initialize HTTP client: {0}")] + ClientInit(#[from] reqwest::Error), + + #[error("Failed to download snapshot from {0}: {1}")] + Download(String, reqwest::Error), + + #[error("Download failed from {0}: HTTP status {1}")] + InvalidStatusCode(String, reqwest::StatusCode), + + #[error("Cannot create directory {0}: {1}")] + CreateDirectory(PathBuf, io::Error), + + #[error("I/O error: {0}")] + Io(#[from] io::Error), +} + +/// Handles downloading and decompressing snapshot files +pub struct SnapshotDownloader { + client: Client, + network_dir: String, +} + +impl SnapshotDownloader { + pub fn new(network_dir: String) -> Result { + let client = Client::builder() + .timeout(std::time::Duration::from_mins(5)) + .connect_timeout(std::time::Duration::from_secs(30)) + .build()?; + + Ok(Self { + client, + network_dir, + }) + } + + pub async fn download_all( + &self, + snapshots: &[SnapshotFileMetadata], + ) -> Result<(), DownloadError> { + for snapshot_meta in snapshots { + let file_path = snapshot_meta.file_path(&self.network_dir); + self.download_single(&snapshot_meta.url, &file_path).await?; + } + Ok(()) + } + + /// Downloads a gzip-compressed snapshot from the given URL, decompresses it on-the-fly, + /// and saves the decompressed CBOR data to the specified output path. + /// The data is first written to a `.partial` temporary file to ensure atomicity + /// and then renamed to the final output path upon successful completion. + pub async fn download_single(&self, url: &str, output_path: &str) -> Result<(), DownloadError> { + let path = Path::new(output_path); + + if path.exists() { + info!("Snapshot already exists, skipping: {}", output_path); + return Ok(()); + } + + info!("Downloading snapshot from {}", url); + + if let Some(parent) = path.parent() { + tokio::fs::create_dir_all(parent) + .await + .map_err(|e| DownloadError::CreateDirectory(parent.to_path_buf(), e))?; + } + + let tmp_path = path.with_extension("partial"); + + let result = async { + let response = self + .client + .get(url) + .send() + .await + .map_err(|e| DownloadError::Download(url.to_string(), e))?; + + if !response.status().is_success() { + return Err(DownloadError::InvalidStatusCode( + url.to_string(), + response.status(), + )); + } + + let content_length = response.content_length(); + let mut file = File::create(&tmp_path).await?; + + let stream = response.bytes_stream(); + let async_read = tokio_util::io::StreamReader::new( + stream.map_err(|e| io::Error::new(std::io::ErrorKind::Other, e)), + ); + + let progress_reader = ProgressReader::new(async_read, content_length, 200); + let buffered = BufReader::new(progress_reader); + let mut decoder = GzipDecoder::new(buffered); + + tokio::io::copy(&mut decoder, &mut file).await?; + + file.sync_all().await?; + tokio::fs::rename(&tmp_path, output_path).await?; + + info!("Downloaded and decompressed snapshot to {}", output_path); + Ok(()) + } + .await; + + if result.is_err() { + let _ = tokio::fs::remove_file(&tmp_path).await; + } + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::io::Write; + use tempfile::TempDir; + + fn create_fake_snapshot(dir: &Path, point: &str) { + let snapshot_path = dir.join(format!("{}.cbor", point)); + let mut file = fs::File::create(&snapshot_path).unwrap(); + file.write_all(b"fake snapshot data").unwrap(); + } + + #[tokio::test] + async fn test_downloader_skips_existing_file() { + let temp_dir = TempDir::new().unwrap(); + let point = "point_500"; + create_fake_snapshot(temp_dir.path(), point); + + let file_path = temp_dir.path().join(format!("{}.cbor", point)); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let result = downloader + .download_single( + "https://example.com/snapshot.cbor.gz", + file_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_ok()); + assert!(file_path.exists()); + } + + #[tokio::test] + async fn test_downloader_missing_file_fails() { + let temp_dir = TempDir::new().unwrap(); + let point = "point_500"; + let file_path = temp_dir.path().join(format!("{}.cbor", point)); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let result = downloader + .download_single( + "https://invalid-url-that-does-not-exist.com/snapshot.cbor.gz", + file_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_err()); + assert!(!file_path.exists()); + } + + #[tokio::test] + async fn test_downloader_creates_directory() { + let temp_dir = TempDir::new().unwrap(); + let nested_path = temp_dir.path().join("nested").join("directory").join("snapshot.cbor"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let _ = downloader + .download_single( + "https://invalid-url.com/snapshot.cbor.gz", + nested_path.to_str().unwrap(), + ) + .await; + + assert!(nested_path.parent().unwrap().exists()); + } + + #[tokio::test] + async fn test_downloader_creates_partial_file_then_renames() { + let temp_dir = TempDir::new().unwrap(); + let output_path = temp_dir.path().join("snapshot.cbor"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let result = downloader + .download_single( + "https://invalid-url.com/snapshot.cbor.gz", + output_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_err()); + assert!(!output_path.exists()); + } +} diff --git a/modules/snapshot_bootstrapper/src/publisher.rs b/modules/snapshot_bootstrapper/src/publisher.rs new file mode 100644 index 00000000..2a114688 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/publisher.rs @@ -0,0 +1,153 @@ +use acropolis_common::{ + messages::{CardanoMessage, Message}, + snapshot::streaming_snapshot::{ + DRepCallback, DRepInfo, GovernanceProposal, PoolCallback, PoolInfo, ProposalCallback, + SnapshotCallbacks, SnapshotMetadata, StakeCallback, UtxoCallback, UtxoEntry, + }, + stake_addresses::AccountState, + BlockInfo, +}; +use anyhow::Result; +use caryatid_sdk::Context; +use std::sync::Arc; +use tracing::info; + +/// Handles publishing snapshot data to the message bus +pub struct SnapshotPublisher { + context: Arc>, + completion_topic: String, + snapshot_topic: String, + metadata: Option, + utxo_count: u64, + pools: Vec, + accounts: Vec, + dreps: Vec, + proposals: Vec, +} + +impl SnapshotPublisher { + pub fn new( + context: Arc>, + completion_topic: String, + snapshot_topic: String, + ) -> Self { + Self { + context, + completion_topic, + snapshot_topic, + metadata: None, + utxo_count: 0, + pools: Vec::new(), + accounts: Vec::new(), + dreps: Vec::new(), + proposals: Vec::new(), + } + } + + pub async fn publish_start(&self) -> Result<()> { + let message = Arc::new(Message::Snapshot( + acropolis_common::messages::SnapshotMessage::Startup, + )); + self.context.publish(&self.snapshot_topic, message).await + } + + pub async fn publish_completion(&self, block_info: BlockInfo) -> Result<()> { + let message = Arc::new(Message::Cardano(( + block_info, + CardanoMessage::SnapshotComplete, + ))); + self.context.publish(&self.completion_topic, message).await + } + + pub fn metadata(&self) -> Option<&SnapshotMetadata> { + self.metadata.as_ref() + } +} + +impl UtxoCallback for SnapshotPublisher { + fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { + self.utxo_count += 1; + + // Log progress every million UTXOs + if self.utxo_count.is_multiple_of(1_000_000) { + info!("Processed {} UTXOs", self.utxo_count); + } + // TODO: Accumulate UTXO data if needed or send in chunks to UTXOState processor + Ok(()) + } +} + +impl PoolCallback for SnapshotPublisher { + fn on_pools(&mut self, pools: Vec) -> Result<()> { + info!("Received {} pools", pools.len()); + self.pools.extend(pools); + // TODO: Accumulate pool data if needed or send in chunks to PoolState processor + Ok(()) + } +} + +impl StakeCallback for SnapshotPublisher { + fn on_accounts(&mut self, accounts: Vec) -> Result<()> { + info!("Received {} accounts", accounts.len()); + self.accounts.extend(accounts); + // TODO: Accumulate account data if needed or send in chunks to AccountState processor + Ok(()) + } +} + +impl DRepCallback for SnapshotPublisher { + fn on_dreps(&mut self, dreps: Vec) -> Result<()> { + info!("Received {} DReps", dreps.len()); + self.dreps.extend(dreps); + // TODO: Accumulate DRep data if needed or send in chunks to DRepState processor + Ok(()) + } +} + +impl ProposalCallback for SnapshotPublisher { + fn on_proposals(&mut self, proposals: Vec) -> Result<()> { + info!("Received {} proposals", proposals.len()); + self.proposals.extend(proposals); + // TODO: Accumulate proposal data if needed or send in chunks to ProposalState processor + Ok(()) + } +} + +impl SnapshotCallbacks for SnapshotPublisher { + fn on_metadata(&mut self, metadata: SnapshotMetadata) -> Result<()> { + info!("Snapshot metadata for epoch {}", metadata.epoch); + info!(" UTXOs: {:?}", metadata.utxo_count); + info!( + " Pot balances: treasury={}, reserves={}, deposits={}", + metadata.pot_balances.treasury, + metadata.pot_balances.reserves, + metadata.pot_balances.deposits + ); + info!( + " - Previous epoch blocks: {}", + metadata.blocks_previous_epoch.len() + ); + info!( + " - Current epoch blocks: {}", + metadata.blocks_current_epoch.len() + ); + + self.metadata = Some(metadata); + Ok(()) + } + + fn on_complete(&mut self) -> Result<()> { + info!("Snapshot parsing completed"); + info!("Final statistics:"); + info!(" - UTXOs processed: {}", self.utxo_count); + info!(" - Pools: {}", self.pools.len()); + info!(" - Accounts: {}", self.accounts.len()); + info!(" - DReps: {}", self.dreps.len()); + info!(" - Proposals: {}", self.proposals.len()); + // We could send a Resolver reference from here for large data, i.e. the UTXO set, + // which could be a file reference. For a file reference, we'd extend the parser to + // give us a callback value with the offset into the file; and we'd make the streaming + // UTXO parser public and reusable, adding it to the resolver implementation. + Ok(()) + } +} diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs deleted file mode 100644 index e97b1ddc..00000000 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ /dev/null @@ -1,876 +0,0 @@ -mod progress_reader; - -use std::{ - path::{Path, PathBuf}, - sync::Arc, - time, -}; - -use crate::progress_reader::ProgressReader; -use acropolis_common::{ - messages::{CardanoMessage, Message}, - snapshot::{ - streaming_snapshot::{ - DRepCallback, DRepInfo, GovernanceProposal, PoolCallback, PoolInfo, ProposalCallback, - SnapshotCallbacks, SnapshotMetadata, StakeCallback, UtxoCallback, UtxoEntry, - }, - StreamingSnapshotParser, - }, - stake_addresses::AccountState, - BlockHash, BlockInfo, BlockStatus, Era, -}; -use anyhow::{bail, Result}; -use async_compression::tokio::bufread::GzipDecoder; -use caryatid_sdk::{module, Context, Subscription}; -use config::Config; -use futures_util::TryStreamExt; -use reqwest::Client; -use serde::{Deserialize, Serialize}; -use std::fs; -use std::io; -use thiserror::Error; -use tokio::fs::File; -use tokio::io::BufReader; -use tokio::time::Instant; -use tracing::{error, info, info_span, Instrument}; - -const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; -const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; -const DEFAULT_COMPLETION_TOPIC: &str = "cardano.snapshot.complete"; -const DEFAULT_BOOTSTRAPPED_TOPIC: &str = "cardano.sequence.bootstrapped"; - -#[derive(Debug, Error)] -pub enum SnapshotBootstrapError { - #[error("Cannot read network config file {0}: {1}")] - ReadNetworkConfig(PathBuf, io::Error), - - #[error("Cannot read snapshots metadata file {0}: {1}")] - ReadSnapshotsFile(PathBuf, io::Error), - - #[error("Failed to parse network config {0}: {1}")] - MalformedNetworkConfig(PathBuf, serde_json::Error), - - #[error("Failed to parse snapshots JSON file {0}: {1}")] - MalformedSnapshotsFile(PathBuf, serde_json::Error), - - #[error("Cannot create directory {0}: {1}")] - CreateDirectory(PathBuf, io::Error), - - #[error("Failed to download snapshot from {0}: {1}")] - DownloadError(String, reqwest::Error), - - #[error("Failed to initalize HTTP client: {0:?}")] - ClientError(reqwest::Error), - - #[error("Download failed from {0}: HTTP status {1}")] - DownloadInvalidStatusCode(String, reqwest::StatusCode), - - #[error("I/O error: {0}")] - Io(#[from] io::Error), - - #[error("Snapshot parsing failed: {0}")] - ParseError(String), -} - -/// Configuration for the snapshot bootstrapper -#[derive(Debug, Clone)] -struct SnapshotConfig { - network: String, - data_dir: String, - startup_topic: String, - snapshot_topic: String, - bootstrapped_topic: String, - completion_topic: String, -} - -impl SnapshotConfig { - fn try_load(config: &Config) -> Result { - Ok(Self { - network: config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()), - data_dir: config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()), - startup_topic: config - .get_string("startup-topic") - .unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()), - snapshot_topic: config - .get_string("snapshot-topic") - .unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()), - bootstrapped_topic: config - .get_string("bootstrapped-subscribe-topic") - .unwrap_or(DEFAULT_BOOTSTRAPPED_TOPIC.to_string()), - completion_topic: config - .get_string("completion-topic") - .unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()), - }) - } - - fn network_dir(&self) -> String { - format!("{}/{}", self.data_dir, self.network) - } - - fn config_path(&self) -> String { - format!("{}/config.json", self.network_dir()) - } - - fn snapshots_path(&self) -> String { - format!("{}/snapshots.json", self.network_dir()) - } -} - -/// Network configuration file (config.json) -#[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "camelCase")] -struct NetworkConfig { - snapshots: Vec, - points: Vec, -} - -/// Point -#[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "camelCase")] -struct Point { - epoch: u64, - id: String, - slot: u64, -} - -/// Snapshot metadata from snapshots.json -#[derive(Debug, Deserialize, Serialize, Clone)] -struct SnapshotFileMetadata { - epoch: u64, - point: String, - url: String, -} - -impl SnapshotFileMetadata { - fn file_path(&self, network_dir: &str) -> String { - format!("{}/{}.cbor", network_dir, self.point) - } -} - -/// Handles publishing snapshot data to the message bus -struct SnapshotPublisher { - context: Arc>, - completion_topic: String, - snapshot_topic: String, - metadata: Option, - utxo_count: u64, - pools: Vec, - accounts: Vec, - dreps: Vec, - proposals: Vec, -} - -impl SnapshotPublisher { - fn new( - context: Arc>, - completion_topic: String, - snapshot_topic: String, - ) -> Self { - Self { - context, - completion_topic, - snapshot_topic, - metadata: None, - utxo_count: 0, - pools: Vec::new(), - accounts: Vec::new(), - dreps: Vec::new(), - proposals: Vec::new(), - } - } - - async fn publish_start(&self) -> Result<()> { - let message = Arc::new(Message::Snapshot( - acropolis_common::messages::SnapshotMessage::Startup, - )); - self.context.publish(&self.snapshot_topic, message).await - } - - async fn publish_completion(&self, block_info: BlockInfo) -> Result<()> { - let message = Arc::new(Message::Cardano(( - block_info, - CardanoMessage::SnapshotComplete, - ))); - self.context.publish(&self.completion_topic, message).await - } -} - -impl UtxoCallback for SnapshotPublisher { - fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { - self.utxo_count += 1; - - // Log progress every million UTXOs - if self.utxo_count.is_multiple_of(1_000_000) { - info!("Processed {} UTXOs", self.utxo_count); - } - // TODO: Accumulate UTXO data if needed or send in chunks to UTXOState processor - Ok(()) - } -} - -impl PoolCallback for SnapshotPublisher { - fn on_pools(&mut self, pools: Vec) -> Result<()> { - info!("Received {} pools", pools.len()); - self.pools.extend(pools); - // TODO: Accumulate pool data if needed or send in chunks to PoolState processor - Ok(()) - } -} - -impl StakeCallback for SnapshotPublisher { - fn on_accounts(&mut self, accounts: Vec) -> Result<()> { - info!("Received {} accounts", accounts.len()); - self.accounts.extend(accounts); - // TODO: Accumulate account data if needed or send in chunks to AccountState processor - Ok(()) - } -} - -impl DRepCallback for SnapshotPublisher { - fn on_dreps(&mut self, dreps: Vec) -> Result<()> { - info!("Received {} DReps", dreps.len()); - self.dreps.extend(dreps); - // TODO: Accumulate DRep data if needed or send in chunks to DRepState processor - Ok(()) - } -} - -impl ProposalCallback for SnapshotPublisher { - fn on_proposals(&mut self, proposals: Vec) -> Result<()> { - info!("Received {} proposals", proposals.len()); - self.proposals.extend(proposals); - // TODO: Accumulate proposal data if needed or send in chunks to ProposalState processor - Ok(()) - } -} - -impl SnapshotCallbacks for SnapshotPublisher { - fn on_metadata(&mut self, metadata: SnapshotMetadata) -> Result<()> { - info!("Snapshot metadata for epoch {}", metadata.epoch); - info!(" UTXOs: {:?}", metadata.utxo_count); - info!( - " Pot balances: treasury={}, reserves={}, deposits={}", - metadata.pot_balances.treasury, - metadata.pot_balances.reserves, - metadata.pot_balances.deposits - ); - info!( - " - Previous epoch blocks: {}", - metadata.blocks_previous_epoch.len() - ); - info!( - " - Current epoch blocks: {}", - metadata.blocks_current_epoch.len() - ); - - self.metadata = Some(metadata); - Ok(()) - } - - fn on_complete(&mut self) -> Result<()> { - info!("Snapshot parsing completed"); - info!("Final statistics:"); - info!(" - UTXOs processed: {}", self.utxo_count); - info!(" - Pools: {}", self.pools.len()); - info!(" - Accounts: {}", self.accounts.len()); - info!(" - DReps: {}", self.dreps.len()); - info!(" - Proposals: {}", self.proposals.len()); - // We could send a Resolver reference from here for large data, i.e. the UTXO set, - // which could be a file reference. For a file reference, we'd extend the parser to - // give us a callback value with the offset into the file; and we'd make the streaming - // UTXO parser public and reusable, adding it to the resolver implementation. - Ok(()) - } -} - -#[module( - message_type(Message), - name = "snapshot-bootstrapper", - description = "Snapshot Bootstrapper to broadcast state via streaming" -)] -pub struct SnapshotBootstrapper; - -impl SnapshotBootstrapper { - /// Initializes the snapshot bootstrapper. - pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { - let cfg = SnapshotConfig::try_load(&config)?; - - info!("Snapshot bootstrapper initializing"); - info!(" Network: {}", cfg.network); - info!(" Data directory: {}", cfg.data_dir); - info!(" Publishing on '{}'", cfg.snapshot_topic); - info!(" Completing with '{}'", cfg.completion_topic); - - let startup_sub = context.subscribe(&cfg.startup_topic).await?; - let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_topic).await?; - - context.clone().run(async move { - let span = info_span!("snapshot_bootstrapper.handle"); - async { - // Wait for the startup signal - if let Err(e) = Self::wait_startup(startup_sub).await { - error!("Failed waiting for startup: {e:#}"); - return; - } - - // Wait for genesis bootstrap completion - if let Err(e) = Self::wait_genesis_completion(bootstrapped_sub).await { - error!("Failed waiting for bootstrapped: {e:#}"); - return; - } - - info!("Bootstrap prerequisites met, starting snapshot processing"); - - // Load network configuration - let network_config = match Self::read_network_config(&cfg.config_path()) { - Ok(config) => config, - Err(e) => { - error!("Failed to read network config: {e:#}"); - return; - } - }; - - // Load snapshots metadata - let all_snapshots = match Self::read_snapshots_metadata(&cfg.snapshots_path()) { - Ok(snapshots) => snapshots, - Err(e) => { - error!("Failed to read snapshots metadata: {e:#}"); - return; - } - }; - - // Filter snapshots based on network config - let target_snapshots = Self::filter_snapshots(&network_config, &all_snapshots); - if target_snapshots.is_empty() { - error!( - "No snapshots found for requested epochs: {:?}", - network_config.snapshots - ); - return; - } - - info!("Found {} snapshot(s) to process", target_snapshots.len()); - - // Download all snapshots - if let Err(e) = - Self::download_snapshots(&target_snapshots, &cfg.network_dir()).await - { - error!("Failed to download snapshots: {e:#}"); - return; - } - - // Process snapshots in order - if let Err(e) = - Self::process_snapshots(&target_snapshots, &cfg, context.clone()).await - { - error!("Failed to process snapshots: {e:#}"); - return; - } - - info!("Snapshot bootstrap completed successfully"); - } - .instrument(span) - .await; - }); - - Ok(()) - } - - async fn wait_startup(mut subscription: Box>) -> Result<()> { - let (_, _message) = subscription.read().await?; - info!("Received startup message"); - Ok(()) - } - - async fn wait_genesis_completion( - mut subscription: Box>, - ) -> Result<()> { - let (_, message) = subscription.read().await?; - match message.as_ref() { - Message::Cardano((_, CardanoMessage::GenesisComplete(_complete))) => { - info!("Received genesis complete message"); - Ok(()) - } - msg => bail!("Unexpected message in bootstrapped topic: {msg:?}"), - } - } - - fn read_network_config(path: &str) -> Result { - let path_buf = PathBuf::from(path); - let content = fs::read_to_string(&path_buf) - .map_err(|e| SnapshotBootstrapError::ReadNetworkConfig(path_buf.clone(), e))?; - - let config: NetworkConfig = serde_json::from_str(&content) - .map_err(|e| SnapshotBootstrapError::MalformedNetworkConfig(path_buf, e))?; - - Ok(config) - } - - fn read_snapshots_metadata( - path: &str, - ) -> Result, SnapshotBootstrapError> { - let path_buf = PathBuf::from(path); - let content = fs::read_to_string(&path_buf) - .map_err(|e| SnapshotBootstrapError::ReadSnapshotsFile(path_buf.clone(), e))?; - - let snapshots: Vec = serde_json::from_str(&content) - .map_err(|e| SnapshotBootstrapError::MalformedSnapshotsFile(path_buf, e))?; - - Ok(snapshots) - } - - fn filter_snapshots( - network_config: &NetworkConfig, - all_snapshots: &[SnapshotFileMetadata], - ) -> Vec { - all_snapshots - .iter() - .filter(|s| network_config.snapshots.contains(&s.epoch)) - .cloned() - .collect() - } - - async fn download_snapshots( - snapshots: &[SnapshotFileMetadata], - network_dir: &str, - ) -> Result<(), SnapshotBootstrapError> { - let client = Client::builder() - .timeout(std::time::Duration::from_mins(5)) - .connect_timeout(std::time::Duration::from_secs(30)) - .build() - .map_err(|e| SnapshotBootstrapError::ClientError(e))?; - - for snapshot_meta in snapshots { - let file_path = snapshot_meta.file_path(network_dir); - Self::download_snapshot(&client, &snapshot_meta.url, &file_path).await?; - } - Ok(()) - } - - /// Downloads a gzip-compressed snapshot from the given URL, decompresses it on-the-fly, - /// and saves the decompressed CBOR data to the specified output path. - /// The data is first written to a `.partial` temporary file to ensure atomicity - /// and then renamed to the final output path upon successful completion. - async fn download_snapshot( - client: &Client, - url: &str, - output_path: &str, - ) -> Result<(), SnapshotBootstrapError> { - let path = Path::new(output_path); - - if path.exists() { - info!("Snapshot already exists, skipping: {}", output_path); - return Ok(()); - } - - info!("Downloading snapshot from {}", url); - - if let Some(parent) = path.parent() { - tokio::fs::create_dir_all(parent) - .await - .map_err(|e| SnapshotBootstrapError::CreateDirectory(parent.to_path_buf(), e))?; - } - - let tmp_path = path.with_extension("partial"); - - let result = async { - let response = client - .get(url) - .send() - .await - .map_err(|e| SnapshotBootstrapError::DownloadError(url.to_string(), e))?; - - if !response.status().is_success() { - return Err(SnapshotBootstrapError::DownloadInvalidStatusCode( - url.to_string(), - response.status(), - )); - } - - let content_length = response.content_length(); - let mut file = File::create(&tmp_path).await?; - - let stream = response.bytes_stream(); - let async_read = tokio_util::io::StreamReader::new( - stream.map_err(|e| io::Error::new(std::io::ErrorKind::Other, e)), - ); - - let progress_reader = ProgressReader::new(async_read, content_length, 200); - let buffered = BufReader::new(progress_reader); - let mut decoder = GzipDecoder::new(buffered); - - tokio::io::copy(&mut decoder, &mut file).await?; - - file.sync_all().await?; - tokio::fs::rename(&tmp_path, output_path).await?; - - info!("Downloaded and decompressed snapshot to {}", output_path); - Ok(()) - } - .await; - - if result.is_err() { - let _ = tokio::fs::remove_file(&tmp_path).await; - } - - result - } - - async fn process_snapshots( - snapshots: &[SnapshotFileMetadata], - cfg: &SnapshotConfig, - context: Arc>, - ) -> Result<()> { - let mut publisher = SnapshotPublisher::new( - context, - cfg.completion_topic.clone(), - cfg.snapshot_topic.clone(), - ); - - // Publish start once at the beginning - publisher.publish_start().await?; - - for snapshot_meta in snapshots { - let file_path = snapshot_meta.file_path(&cfg.network_dir()); - - info!( - "Processing snapshot for epoch {} from {}", - snapshot_meta.epoch, file_path - ); - - Self::parse_snapshot(&file_path, &mut publisher).await?; - } - - let metadata = publisher - .metadata - .as_ref() - .ok_or_else(|| anyhow::anyhow!("No metadata received from snapshots"))?; - - let block_info = build_block_info_from_metadata(metadata); - publisher.publish_completion(block_info).await?; - - Ok(()) - } - - async fn parse_snapshot(file_path: &str, publisher: &mut SnapshotPublisher) -> Result<()> { - info!("Parsing snapshot: {}", file_path); - let start = Instant::now(); - - let parser = StreamingSnapshotParser::new(file_path); - parser.parse(publisher)?; - - let duration = start.elapsed(); - info!("Parsed snapshot in {:.2?}", duration); - - Ok(()) - } -} - -fn build_block_info_from_metadata(metadata: &SnapshotMetadata) -> BlockInfo { - BlockInfo { - status: BlockStatus::Immutable, - slot: 0, - number: 0, - hash: BlockHash::default(), - epoch: metadata.epoch, - epoch_slot: 0, - new_epoch: false, - timestamp: 0, - era: Era::Conway, - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs; - use std::io::Write; - use std::path::Path; - use tempfile::TempDir; - - fn create_test_network_config(dir: &Path, snapshots: Vec) -> PathBuf { - let config = NetworkConfig { - snapshots, - points: vec![Point { - epoch: 500, - id: "test_block_hash".to_string(), - slot: 12345678, - }], - }; - - let config_path = dir.join("config.json"); - let mut file = fs::File::create(&config_path).unwrap(); - file.write_all(serde_json::to_string_pretty(&config).unwrap().as_bytes()).unwrap(); - config_path - } - - fn create_test_snapshots_metadata(dir: &Path, epochs: Vec, base_url: &str) -> PathBuf { - let snapshots: Vec = epochs - .iter() - .map(|epoch| SnapshotFileMetadata { - epoch: *epoch, - point: format!("point_{}", epoch), - url: format!("{}/snapshot_{}.cbor.gz", base_url, epoch), - }) - .collect(); - - let snapshots_path = dir.join("snapshots.json"); - let mut file = fs::File::create(&snapshots_path).unwrap(); - file.write_all(serde_json::to_string_pretty(&snapshots).unwrap().as_bytes()).unwrap(); - snapshots_path - } - - fn create_fake_snapshot(dir: &Path, point: &str) { - let snapshot_path = dir.join(format!("{}.cbor", point)); - let mut file = fs::File::create(&snapshot_path).unwrap(); - file.write_all(b"fake snapshot data").unwrap(); - } - - #[test] - fn test_read_network_config_success() { - let temp_dir = TempDir::new().unwrap(); - let config_path = create_test_network_config(temp_dir.path(), vec![500, 501]); - - let result = SnapshotBootstrapper::read_network_config(config_path.to_str().unwrap()); - assert!(result.is_ok()); - - let config = result.unwrap(); - assert_eq!(config.snapshots, vec![500, 501]); - assert_eq!(config.points.len(), 1); - } - - #[test] - fn test_read_network_config_missing_file() { - let result = SnapshotBootstrapper::read_network_config("/nonexistent/config.json"); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - SnapshotBootstrapError::ReadNetworkConfig(_, _) - )); - } - - #[test] - fn test_read_network_config_malformed_json() { - let temp_dir = TempDir::new().unwrap(); - let config_path = temp_dir.path().join("config.json"); - let mut file = fs::File::create(&config_path).unwrap(); - file.write_all(b"{ invalid json }").unwrap(); - - let result = SnapshotBootstrapper::read_network_config(config_path.to_str().unwrap()); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - SnapshotBootstrapError::MalformedNetworkConfig(_, _) - )); - } - - #[test] - fn test_read_snapshots_metadata_success() { - let temp_dir = TempDir::new().unwrap(); - let snapshots_path = - create_test_snapshots_metadata(temp_dir.path(), vec![500, 501], "https://example.com"); - - let result = - SnapshotBootstrapper::read_snapshots_metadata(snapshots_path.to_str().unwrap()); - assert!(result.is_ok()); - - let snapshots = result.unwrap(); - assert_eq!(snapshots.len(), 2); - assert_eq!(snapshots[0].epoch, 500); - assert_eq!(snapshots[1].epoch, 501); - } - - #[test] - fn test_read_snapshots_metadata_missing_file() { - let result = SnapshotBootstrapper::read_snapshots_metadata("/nonexistent/snapshots.json"); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - SnapshotBootstrapError::ReadSnapshotsFile(_, _) - )); - } - - #[test] - fn test_filter_snapshots() { - let network_config = NetworkConfig { - snapshots: vec![500, 502], - points: vec![], - }; - - let all_snapshots = vec![ - SnapshotFileMetadata { - epoch: 500, - point: "point_500".to_string(), - url: "url1".to_string(), - }, - SnapshotFileMetadata { - epoch: 501, - point: "point_501".to_string(), - url: "url2".to_string(), - }, - SnapshotFileMetadata { - epoch: 502, - point: "point_502".to_string(), - url: "url3".to_string(), - }, - ]; - - let filtered = SnapshotBootstrapper::filter_snapshots(&network_config, &all_snapshots); - - assert_eq!(filtered.len(), 2); - assert_eq!(filtered[0].epoch, 500); - assert_eq!(filtered[1].epoch, 502); - } - - #[tokio::test] - async fn test_download_snapshot_skips_existing_file() { - let temp_dir = TempDir::new().unwrap(); - let point = "point_500"; - create_fake_snapshot(temp_dir.path(), point); - - let file_path = temp_dir.path().join(format!("{}.cbor", point)); - - let result = SnapshotBootstrapper::download_snapshot( - &Client::new(), - "https://example.com/snapshot.cbor.gz", - file_path.to_str().unwrap(), - ) - .await; - - assert!(result.is_ok()); - assert!(file_path.exists()); - } - - #[tokio::test] - async fn test_download_snapshot_missing_file_fails() { - let temp_dir = TempDir::new().unwrap(); - let point = "point_500"; - let file_path = temp_dir.path().join(format!("{}.cbor", point)); - - let result = SnapshotBootstrapper::download_snapshot( - &Client::new(), - "https://invalid-url-that-does-not-exist.com/snapshot.cbor.gz", - file_path.to_str().unwrap(), - ) - .await; - - assert!(result.is_err()); - assert!(!file_path.exists()); - } - - #[test] - fn test_snapshot_filtering_by_epoch() { - let temp_dir = TempDir::new().unwrap(); - create_test_network_config(temp_dir.path(), vec![500, 502]); - create_test_snapshots_metadata( - temp_dir.path(), - vec![500, 501, 502, 503], - "https://example.com", - ); - - let network_config = SnapshotBootstrapper::read_network_config( - temp_dir.path().join("config.json").to_str().unwrap(), - ) - .unwrap(); - - let all_snapshots = SnapshotBootstrapper::read_snapshots_metadata( - temp_dir.path().join("snapshots.json").to_str().unwrap(), - ) - .unwrap(); - - let target_snapshots = - SnapshotBootstrapper::filter_snapshots(&network_config, &all_snapshots); - - assert_eq!(target_snapshots.len(), 2); - assert_eq!(target_snapshots[0].epoch, 500); - assert_eq!(target_snapshots[1].epoch, 502); - } - - #[test] - fn test_empty_snapshots_list() { - let temp_dir = TempDir::new().unwrap(); - create_test_network_config(temp_dir.path(), vec![999]); - create_test_snapshots_metadata(temp_dir.path(), vec![500, 501], "https://example.com"); - - let network_config = SnapshotBootstrapper::read_network_config( - temp_dir.path().join("config.json").to_str().unwrap(), - ) - .unwrap(); - - let all_snapshots = SnapshotBootstrapper::read_snapshots_metadata( - temp_dir.path().join("snapshots.json").to_str().unwrap(), - ) - .unwrap(); - - let target_snapshots = - SnapshotBootstrapper::filter_snapshots(&network_config, &all_snapshots); - - assert!(target_snapshots.is_empty()); - } - - #[tokio::test] - async fn test_download_snapshot_creates_directory() { - let temp_dir = TempDir::new().unwrap(); - let nested_path = temp_dir.path().join("nested").join("directory").join("snapshot.cbor"); - - let _ = SnapshotBootstrapper::download_snapshot( - &Client::new(), - "https://invalid-url.com/snapshot.cbor.gz", - nested_path.to_str().unwrap(), - ) - .await; - - assert!(nested_path.parent().unwrap().exists()); - } - - #[test] - fn test_corrupted_config_json_fails_gracefully() { - let temp_dir = TempDir::new().unwrap(); - let config_path = temp_dir.path().join("config.json"); - let mut file = fs::File::create(&config_path).unwrap(); - file.write_all(b"{\"snapshots\": [500, 501]").unwrap(); - - let result = SnapshotBootstrapper::read_network_config(config_path.to_str().unwrap()); - assert!(result.is_err()); - - if let Err(SnapshotBootstrapError::MalformedNetworkConfig(path, _)) = result { - assert_eq!(path, config_path); - } else { - panic!("Expected MalformedNetworkConfig error"); - } - } - - #[test] - fn test_corrupted_snapshots_json_fails_gracefully() { - let temp_dir = TempDir::new().unwrap(); - let snapshots_path = temp_dir.path().join("snapshots.json"); - let mut file = fs::File::create(&snapshots_path).unwrap(); - file.write_all(b"[{\"epoch\": 500}").unwrap(); - - let result = - SnapshotBootstrapper::read_snapshots_metadata(snapshots_path.to_str().unwrap()); - assert!(result.is_err()); - - if let Err(SnapshotBootstrapError::MalformedSnapshotsFile(path, _)) = result { - assert_eq!(path, snapshots_path); - } else { - panic!("Expected MalformedSnapshotsFile error"); - } - } - - #[tokio::test] - async fn test_download_creates_partial_file_then_renames() { - let temp_dir = TempDir::new().unwrap(); - let output_path = temp_dir.path().join("snapshot.cbor"); - - let result = SnapshotBootstrapper::download_snapshot( - &Client::new(), - "https://invalid-url.com/snapshot.cbor.gz", - output_path.to_str().unwrap(), - ) - .await; - - assert!(result.is_err()); - assert!(!output_path.exists()); - } -} From abd5cd4bca16f625d4ec17ae36754c36bcfff84a Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Sun, 23 Nov 2025 10:38:57 -0800 Subject: [PATCH 11/28] feat: simplify error handling in downloader by using std::io::Error::other --- modules/snapshot_bootstrapper/src/downloader.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index ea3e7332..98e222ee 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -99,9 +99,8 @@ impl SnapshotDownloader { let mut file = File::create(&tmp_path).await?; let stream = response.bytes_stream(); - let async_read = tokio_util::io::StreamReader::new( - stream.map_err(|e| io::Error::new(std::io::ErrorKind::Other, e)), - ); + let async_read = + tokio_util::io::StreamReader::new(stream.map_err(|e| std::io::Error::other(e))); let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); From 6d445e5dcfc4f75efd851c7c0f6de5a7806d3c08 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Sun, 23 Nov 2025 10:38:57 -0800 Subject: [PATCH 12/28] feat: simplify error handling in downloader by using std::io::Error::other --- modules/snapshot_bootstrapper/src/downloader.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index ea3e7332..ad848f02 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -99,9 +99,8 @@ impl SnapshotDownloader { let mut file = File::create(&tmp_path).await?; let stream = response.bytes_stream(); - let async_read = tokio_util::io::StreamReader::new( - stream.map_err(|e| io::Error::new(std::io::ErrorKind::Other, e)), - ); + let async_read = + tokio_util::io::StreamReader::new(stream.map_err(|_| std::io::Error::other)); let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); From 03fa02f9b4bbcb9474cb1de19881ebed5b719a2b Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 07:51:41 -0800 Subject: [PATCH 13/28] feat: rename error variant for clarity and improve error handling in downloader --- modules/snapshot_bootstrapper/src/downloader.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index ad848f02..9c1614be 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -16,7 +16,7 @@ pub enum DownloadError { ClientInit(#[from] reqwest::Error), #[error("Failed to download snapshot from {0}: {1}")] - Download(String, reqwest::Error), + RequestFailed(String, reqwest::Error), #[error("Download failed from {0}: HTTP status {1}")] InvalidStatusCode(String, reqwest::StatusCode), @@ -86,7 +86,7 @@ impl SnapshotDownloader { .get(url) .send() .await - .map_err(|e| DownloadError::Download(url.to_string(), e))?; + .map_err(|e| DownloadError::RequestFailed(url.to_string(), e))?; if !response.status().is_success() { return Err(DownloadError::InvalidStatusCode( @@ -99,8 +99,9 @@ impl SnapshotDownloader { let mut file = File::create(&tmp_path).await?; let stream = response.bytes_stream(); - let async_read = - tokio_util::io::StreamReader::new(stream.map_err(|_| std::io::Error::other)); + let async_read = tokio_util::io::StreamReader::new( + stream.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)), + ); let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); From 8b72d9d61521d147690b91e38109732c23cb19e3 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 07:51:41 -0800 Subject: [PATCH 14/28] feat: rename error variant for clarity and improve error handling in downloader --- modules/snapshot_bootstrapper/src/downloader.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index ad848f02..00b28209 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -16,7 +16,7 @@ pub enum DownloadError { ClientInit(#[from] reqwest::Error), #[error("Failed to download snapshot from {0}: {1}")] - Download(String, reqwest::Error), + RequestFailed(String, reqwest::Error), #[error("Download failed from {0}: HTTP status {1}")] InvalidStatusCode(String, reqwest::StatusCode), @@ -86,7 +86,7 @@ impl SnapshotDownloader { .get(url) .send() .await - .map_err(|e| DownloadError::Download(url.to_string(), e))?; + .map_err(|e| DownloadError::RequestFailed(url.to_string(), e))?; if !response.status().is_success() { return Err(DownloadError::InvalidStatusCode( @@ -100,7 +100,7 @@ impl SnapshotDownloader { let stream = response.bytes_stream(); let async_read = - tokio_util::io::StreamReader::new(stream.map_err(|_| std::io::Error::other)); + tokio_util::io::StreamReader::new(stream.map_err(|e| std::io::Error::other(e))); let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); From 26088e9690ceab7ead68bdbd3a508b0a0ffc572e Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 08:29:02 -0800 Subject: [PATCH 15/28] feat: add new error variant for stream read failures in downloader --- modules/snapshot_bootstrapper/src/downloader.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index 00b28209..7cf99c80 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -24,6 +24,9 @@ pub enum DownloadError { #[error("Cannot create directory {0}: {1}")] CreateDirectory(PathBuf, io::Error), + #[error("Stream read failed: {0}")] + StreamRead(#[source] io::Error), + #[error("I/O error: {0}")] Io(#[from] io::Error), } @@ -99,8 +102,9 @@ impl SnapshotDownloader { let mut file = File::create(&tmp_path).await?; let stream = response.bytes_stream(); - let async_read = - tokio_util::io::StreamReader::new(stream.map_err(|e| std::io::Error::other(e))); + let async_read = tokio_util::io::StreamReader::new( + stream.map_err(|e| DownloadError::StreamRead(io::Error::other(e))), + ); let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); From 5c5108122881459ec9ce11923f737b2b78d24932 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 09:00:26 -0800 Subject: [PATCH 16/28] feat: simplify error handling for stream reading in downloader --- modules/snapshot_bootstrapper/src/downloader.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index 7cf99c80..9ed850bb 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -24,9 +24,6 @@ pub enum DownloadError { #[error("Cannot create directory {0}: {1}")] CreateDirectory(PathBuf, io::Error), - #[error("Stream read failed: {0}")] - StreamRead(#[source] io::Error), - #[error("I/O error: {0}")] Io(#[from] io::Error), } @@ -101,11 +98,8 @@ impl SnapshotDownloader { let content_length = response.content_length(); let mut file = File::create(&tmp_path).await?; - let stream = response.bytes_stream(); - let async_read = tokio_util::io::StreamReader::new( - stream.map_err(|e| DownloadError::StreamRead(io::Error::other(e))), - ); - + let stream = response.bytes_stream().map_err(io::Error::other); + let async_read = tokio_util::io::StreamReader::new(stream); let progress_reader = ProgressReader::new(async_read, content_length, 200); let buffered = BufReader::new(progress_reader); let mut decoder = GzipDecoder::new(buffered); From da2e4ac14b8e3c647c70d81b76f2614beba4a085 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 09:23:44 -0800 Subject: [PATCH 17/28] feat: add wiremock and flate2 dependencies for downloader tests --- Cargo.lock | 54 ++++++++ modules/snapshot_bootstrapper/Cargo.toml | 2 + .../snapshot_bootstrapper/src/downloader.rs | 115 ++++++++++++------ 3 files changed, 135 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a01831af..b5b914f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -376,6 +376,7 @@ dependencies = [ "async-compression", "caryatid_sdk", "config", + "flate2", "futures-util", "reqwest 0.12.24", "serde", @@ -385,6 +386,7 @@ dependencies = [ "tokio", "tokio-util", "tracing", + "wiremock", ] [[package]] @@ -840,6 +842,16 @@ dependencies = [ "syn 2.0.109", ] +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -2151,6 +2163,24 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "der" version = "0.7.10" @@ -2994,6 +3024,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -7415,6 +7446,29 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64 0.22.1", + "deadpool", + "futures", + "http 1.3.1", + "http-body-util", + "hyper 1.7.0", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.46.0" diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index aa569913..c5daca8d 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -26,6 +26,8 @@ futures-util = "0.3.31" tokio-util = "0.7.17" [dev-dependencies] +wiremock = "0.6.5" +flate2 = "1.1.5" tempfile = "3" [lib] diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index 9ed850bb..ac387656 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -125,23 +125,25 @@ impl SnapshotDownloader { #[cfg(test)] mod tests { use super::*; - use std::fs; + use flate2::write::GzEncoder; + use flate2::Compression; use std::io::Write; use tempfile::TempDir; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; - fn create_fake_snapshot(dir: &Path, point: &str) { - let snapshot_path = dir.join(format!("{}.cbor", point)); - let mut file = fs::File::create(&snapshot_path).unwrap(); - file.write_all(b"fake snapshot data").unwrap(); + fn gzip_compress(data: &[u8]) -> Vec { + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(data).unwrap(); + encoder.finish().unwrap() } #[tokio::test] async fn test_downloader_skips_existing_file() { let temp_dir = TempDir::new().unwrap(); - let point = "point_500"; - create_fake_snapshot(temp_dir.path(), point); + let file_path = temp_dir.path().join("snapshot.cbor"); + std::fs::write(&file_path, b"existing data").unwrap(); - let file_path = temp_dir.path().join(format!("{}.cbor", point)); let downloader = SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); @@ -153,60 +155,101 @@ mod tests { .await; assert!(result.is_ok()); - assert!(file_path.exists()); + assert_eq!(std::fs::read(&file_path).unwrap(), b"existing data"); } #[tokio::test] - async fn test_downloader_missing_file_fails() { + async fn test_downloader_downloads_and_decompresses() { + let mock_server = MockServer::start().await; + let compressed = gzip_compress(b"snapshot content"); + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(compressed)) + .mount(&mock_server) + .await; + let temp_dir = TempDir::new().unwrap(); - let point = "point_500"; - let file_path = temp_dir.path().join(format!("{}.cbor", point)); + let file_path = temp_dir.path().join("snapshot.cbor"); let downloader = SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); - let result = downloader - .download_single( - "https://invalid-url-that-does-not-exist.com/snapshot.cbor.gz", - file_path.to_str().unwrap(), - ) + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + + assert!(result.is_ok()); + assert_eq!(std::fs::read(&file_path).unwrap(), b"snapshot content"); + } + + #[tokio::test] + async fn test_downloader_handles_http_error() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(404)) + .mount(&mock_server) .await; - assert!(result.is_err()); + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("snapshot.cbor"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + + assert!(matches!( + result, + Err(DownloadError::InvalidStatusCode(_, _)) + )); assert!(!file_path.exists()); } #[tokio::test] - async fn test_downloader_creates_directory() { + async fn test_downloader_creates_parent_directories() { + let mock_server = MockServer::start().await; + let compressed = gzip_compress(b"data"); + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(compressed)) + .mount(&mock_server) + .await; + let temp_dir = TempDir::new().unwrap(); - let nested_path = temp_dir.path().join("nested").join("directory").join("snapshot.cbor"); + let file_path = temp_dir.path().join("nested").join("dir").join("snapshot.cbor"); let downloader = SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); - let _ = downloader - .download_single( - "https://invalid-url.com/snapshot.cbor.gz", - nested_path.to_str().unwrap(), - ) - .await; + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; - assert!(nested_path.parent().unwrap().exists()); + assert!(result.is_ok()); + assert!(file_path.exists()); } #[tokio::test] - async fn test_downloader_creates_partial_file_then_renames() { + async fn test_downloader_cleans_up_partial_on_failure() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(b"not valid gzip")) + .mount(&mock_server) + .await; + let temp_dir = TempDir::new().unwrap(); - let output_path = temp_dir.path().join("snapshot.cbor"); + let file_path = temp_dir.path().join("snapshot.cbor"); + let partial_path = temp_dir.path().join("snapshot.partial"); let downloader = SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); - let result = downloader - .download_single( - "https://invalid-url.com/snapshot.cbor.gz", - output_path.to_str().unwrap(), - ) - .await; + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; assert!(result.is_err()); - assert!(!output_path.exists()); + assert!(!file_path.exists()); + assert!(!partial_path.exists()); } } From 91bdfb1aadbbd57828ffe91a09c8eefb8e9fa865 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 09:58:22 -0800 Subject: [PATCH 18/28] feat: reorganize module structure and update dependencies for snapshot handling --- Cargo.lock | 13 +++++----- common/Cargo.toml | 10 +++++++- .../src => common/src/snapshot}/config.rs | 0 .../src => common/src/snapshot}/downloader.rs | 4 +-- common/src/snapshot/mod.rs | 25 +++++++++++-------- .../src/snapshot}/progress_reader.rs | 0 modules/snapshot_bootstrapper/Cargo.toml | 11 -------- .../snapshot_bootstrapper/src/bootstrapper.rs | 10 +++----- 8 files changed, 36 insertions(+), 37 deletions(-) rename {modules/snapshot_bootstrapper/src => common/src/snapshot}/config.rs (100%) rename {modules/snapshot_bootstrapper/src => common/src/snapshot}/downloader.rs (98%) rename {modules/snapshot_bootstrapper/src => common/src/snapshot}/progress_reader.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index b5b914f9..5dccfd3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,7 @@ name = "acropolis_common" version = "0.3.0" dependencies = [ "anyhow", + "async-compression", "bech32 0.11.0", "bigdecimal", "bitmask-enum", @@ -34,7 +35,9 @@ dependencies = [ "crc", "cryptoxide 0.5.1", "dashmap", + "flate2", "futures", + "futures-util", "hex", "memmap2", "minicbor 0.26.5", @@ -42,6 +45,7 @@ dependencies = [ "num-traits", "rayon", "regex", + "reqwest 0.12.24", "serde", "serde_json", "serde_with 3.15.1", @@ -49,7 +53,9 @@ dependencies = [ "tempfile", "thiserror 2.0.17", "tokio", + "tokio-util", "tracing", + "wiremock", ] [[package]] @@ -373,20 +379,13 @@ version = "0.1.0" dependencies = [ "acropolis_common", "anyhow", - "async-compression", "caryatid_sdk", "config", - "flate2", - "futures-util", - "reqwest 0.12.24", "serde", "serde_json", - "tempfile", "thiserror 2.0.17", "tokio", - "tokio-util", "tracing", - "wiremock", ] [[package]] diff --git a/common/Cargo.toml b/common/Cargo.toml index fc5e948d..1411f6c4 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -29,7 +29,6 @@ regex = "1" serde = { workspace = true, features = ["rc"] } serde_json = { workspace = true } serde_with = { workspace = true, features = ["base64"] } -tempfile = "3" tokio = { workspace = true } tracing = { workspace = true } futures = "0.3.31" @@ -40,9 +39,18 @@ rayon = "1.11.0" cryptoxide = "0.5.1" thiserror = "2.0.17" sha2 = "0.10.8" +async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } +reqwest = { version = "0.12", features = ["stream"] } +futures-util = "0.3.31" +tokio-util = "0.7.17" caryatid_process = { workspace = true } config = { workspace = true } +[dev-dependencies] +tempfile = "3.23" +wiremock = "0.6.5" +flate2 = "1.1.5" + [lib] crate-type = ["rlib"] path = "src/lib.rs" diff --git a/modules/snapshot_bootstrapper/src/config.rs b/common/src/snapshot/config.rs similarity index 100% rename from modules/snapshot_bootstrapper/src/config.rs rename to common/src/snapshot/config.rs diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/common/src/snapshot/downloader.rs similarity index 98% rename from modules/snapshot_bootstrapper/src/downloader.rs rename to common/src/snapshot/downloader.rs index ac387656..64a2ee57 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/common/src/snapshot/downloader.rs @@ -1,5 +1,5 @@ -use crate::config::SnapshotFileMetadata; -use crate::progress_reader::ProgressReader; +use crate::snapshot::config::SnapshotFileMetadata; +use crate::snapshot::progress_reader::ProgressReader; use async_compression::tokio::bufread::GzipDecoder; use futures_util::TryStreamExt; use reqwest::Client; diff --git a/common/src/snapshot/mod.rs b/common/src/snapshot/mod.rs index 2d368e6d..22d098e0 100644 --- a/common/src/snapshot/mod.rs +++ b/common/src/snapshot/mod.rs @@ -10,21 +10,26 @@ //! - Error types (`error.rs`) // Submodules +pub mod config; +pub mod downloader; mod error; mod parser; pub mod pool_params; +pub mod progress_reader; pub mod streaming_snapshot; // Re-export error types -pub use error::SnapshotError; - +// pub use error::SnapshotError; +// // Re-export parser functions -pub use parser::{compute_sha256, parse_manifest, validate_era, validate_integrity}; - +// pub use parser::{compute_sha256, parse_manifest, validate_era, validate_integrity}; +// // Re-export streaming snapshot APIs -pub use streaming_snapshot::{ - AccountState, Anchor, CollectingCallbacks, DRepCallback, DRepInfo, GovernanceProposal, - PoolCallback, PoolInfo, PoolMetadata, PotBalances, ProposalCallback, Relay, SnapshotCallbacks, - SnapshotMetadata, StakeAddressState, StakeCallback, StreamingSnapshotParser, UtxoCallback, - UtxoEntry, -}; +// pub use streaming_snapshot::{ +// AccountState, Anchor, CollectingCallbacks, DRepCallback, DRepInfo, GovernanceProposal, +// PoolCallback, PoolInfo, PoolMetadata, PotBalances, ProposalCallback, Relay, SnapshotCallbacks, +// SnapshotMetadata, StakeAddressState, StakeCallback, StreamingSnapshotParser, UtxoCallback, +// UtxoEntry, +// }; +// pub use downloader::{SnapshotDownloader, DownloadError}; +// pub use config::{SnapshotConfig, SnapshotFileMetadata}; diff --git a/modules/snapshot_bootstrapper/src/progress_reader.rs b/common/src/snapshot/progress_reader.rs similarity index 100% rename from modules/snapshot_bootstrapper/src/progress_reader.rs rename to common/src/snapshot/progress_reader.rs diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index c5daca8d..1389fadc 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -17,18 +17,7 @@ anyhow = { workspace = true } config = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } -serde = { version = "1.0.228", features = ["derive"] } -serde_json = "1.0.132" -async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } -reqwest = { version = "0.12", features = ["stream"] } thiserror = "2.0.17" -futures-util = "0.3.31" -tokio-util = "0.7.17" - -[dev-dependencies] -wiremock = "0.6.5" -flate2 = "1.1.5" -tempfile = "3" [lib] path = "src/bootstrapper.rs" diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index d876ba19..9b32ca57 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -1,19 +1,17 @@ -mod config; -mod downloader; -mod progress_reader; mod publisher; -use crate::config::{ConfigError, NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; -use crate::downloader::{DownloadError, SnapshotDownloader}; use crate::publisher::SnapshotPublisher; use ::config::Config; +use acropolis_common::snapshot::config::{NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; +use acropolis_common::snapshot::downloader::{DownloadError, SnapshotDownloader}; +use acropolis_common::snapshot::streaming_snapshot::StreamingSnapshotParser; use acropolis_common::{ messages::{CardanoMessage, Message}, - snapshot::StreamingSnapshotParser, BlockHash, BlockInfo, BlockStatus, Era, }; use anyhow::{bail, Result}; use caryatid_sdk::{module, Context, Subscription}; +use config::ConfigError; use std::sync::Arc; use thiserror::Error; use tokio::time::Instant; From 9efcf06f40bebde68a7a1197a62d9096a781aee7 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 10:58:45 -0800 Subject: [PATCH 19/28] feat: refactor module structure and update dependencies for downloader and configuration --- Cargo.lock | 13 +++++----- common/Cargo.toml | 6 ----- common/src/snapshot/mod.rs | 25 ++++++++----------- modules/snapshot_bootstrapper/Cargo.toml | 12 +++++++++ .../snapshot_bootstrapper/src/bootstrapper.rs | 10 +++++--- .../src/configuration.rs | 0 .../snapshot_bootstrapper/src}/downloader.rs | 4 +-- .../src}/progress_reader.rs | 0 8 files changed, 37 insertions(+), 33 deletions(-) rename common/src/snapshot/config.rs => modules/snapshot_bootstrapper/src/configuration.rs (100%) rename {common/src/snapshot => modules/snapshot_bootstrapper/src}/downloader.rs (98%) rename {common/src/snapshot => modules/snapshot_bootstrapper/src}/progress_reader.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 5dccfd3b..b5b914f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,7 +19,6 @@ name = "acropolis_common" version = "0.3.0" dependencies = [ "anyhow", - "async-compression", "bech32 0.11.0", "bigdecimal", "bitmask-enum", @@ -35,9 +34,7 @@ dependencies = [ "crc", "cryptoxide 0.5.1", "dashmap", - "flate2", "futures", - "futures-util", "hex", "memmap2", "minicbor 0.26.5", @@ -45,7 +42,6 @@ dependencies = [ "num-traits", "rayon", "regex", - "reqwest 0.12.24", "serde", "serde_json", "serde_with 3.15.1", @@ -53,9 +49,7 @@ dependencies = [ "tempfile", "thiserror 2.0.17", "tokio", - "tokio-util", "tracing", - "wiremock", ] [[package]] @@ -379,13 +373,20 @@ version = "0.1.0" dependencies = [ "acropolis_common", "anyhow", + "async-compression", "caryatid_sdk", "config", + "flate2", + "futures-util", + "reqwest 0.12.24", "serde", "serde_json", + "tempfile", "thiserror 2.0.17", "tokio", + "tokio-util", "tracing", + "wiremock", ] [[package]] diff --git a/common/Cargo.toml b/common/Cargo.toml index 1411f6c4..bfb5380b 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -39,17 +39,11 @@ rayon = "1.11.0" cryptoxide = "0.5.1" thiserror = "2.0.17" sha2 = "0.10.8" -async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } -reqwest = { version = "0.12", features = ["stream"] } -futures-util = "0.3.31" -tokio-util = "0.7.17" caryatid_process = { workspace = true } config = { workspace = true } [dev-dependencies] tempfile = "3.23" -wiremock = "0.6.5" -flate2 = "1.1.5" [lib] crate-type = ["rlib"] diff --git a/common/src/snapshot/mod.rs b/common/src/snapshot/mod.rs index 22d098e0..2d368e6d 100644 --- a/common/src/snapshot/mod.rs +++ b/common/src/snapshot/mod.rs @@ -10,26 +10,21 @@ //! - Error types (`error.rs`) // Submodules -pub mod config; -pub mod downloader; mod error; mod parser; pub mod pool_params; -pub mod progress_reader; pub mod streaming_snapshot; // Re-export error types -// pub use error::SnapshotError; -// +pub use error::SnapshotError; + // Re-export parser functions -// pub use parser::{compute_sha256, parse_manifest, validate_era, validate_integrity}; -// +pub use parser::{compute_sha256, parse_manifest, validate_era, validate_integrity}; + // Re-export streaming snapshot APIs -// pub use streaming_snapshot::{ -// AccountState, Anchor, CollectingCallbacks, DRepCallback, DRepInfo, GovernanceProposal, -// PoolCallback, PoolInfo, PoolMetadata, PotBalances, ProposalCallback, Relay, SnapshotCallbacks, -// SnapshotMetadata, StakeAddressState, StakeCallback, StreamingSnapshotParser, UtxoCallback, -// UtxoEntry, -// }; -// pub use downloader::{SnapshotDownloader, DownloadError}; -// pub use config::{SnapshotConfig, SnapshotFileMetadata}; +pub use streaming_snapshot::{ + AccountState, Anchor, CollectingCallbacks, DRepCallback, DRepInfo, GovernanceProposal, + PoolCallback, PoolInfo, PoolMetadata, PotBalances, ProposalCallback, Relay, SnapshotCallbacks, + SnapshotMetadata, StakeAddressState, StakeCallback, StreamingSnapshotParser, UtxoCallback, + UtxoEntry, +}; diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index 1389fadc..1f00d1f8 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -17,7 +17,19 @@ anyhow = { workspace = true } config = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } +serde = { workspace = true, features = ["rc"] } +serde_json = { workspace = true } thiserror = "2.0.17" +async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } +reqwest = { version = "0.12", features = ["stream"] } +futures-util = "0.3.31" +tokio-util = "0.7.17" + +[dev-dependencies] +wiremock = "0.6.5" +flate2 = "1.1.5" +tempfile = "3.23.0" + [lib] path = "src/bootstrapper.rs" diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index 9b32ca57..c0dce50d 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -1,9 +1,11 @@ +mod configuration; +mod downloader; +mod progress_reader; mod publisher; +use crate::configuration::{ConfigError, NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; +use crate::downloader::{DownloadError, SnapshotDownloader}; use crate::publisher::SnapshotPublisher; -use ::config::Config; -use acropolis_common::snapshot::config::{NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; -use acropolis_common::snapshot::downloader::{DownloadError, SnapshotDownloader}; use acropolis_common::snapshot::streaming_snapshot::StreamingSnapshotParser; use acropolis_common::{ messages::{CardanoMessage, Message}, @@ -11,7 +13,7 @@ use acropolis_common::{ }; use anyhow::{bail, Result}; use caryatid_sdk::{module, Context, Subscription}; -use config::ConfigError; +use config::Config; use std::sync::Arc; use thiserror::Error; use tokio::time::Instant; diff --git a/common/src/snapshot/config.rs b/modules/snapshot_bootstrapper/src/configuration.rs similarity index 100% rename from common/src/snapshot/config.rs rename to modules/snapshot_bootstrapper/src/configuration.rs diff --git a/common/src/snapshot/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs similarity index 98% rename from common/src/snapshot/downloader.rs rename to modules/snapshot_bootstrapper/src/downloader.rs index 64a2ee57..779cff53 100644 --- a/common/src/snapshot/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -1,5 +1,5 @@ -use crate::snapshot::config::SnapshotFileMetadata; -use crate::snapshot::progress_reader::ProgressReader; +use crate::configuration::SnapshotFileMetadata; +use crate::progress_reader::ProgressReader; use async_compression::tokio::bufread::GzipDecoder; use futures_util::TryStreamExt; use reqwest::Client; diff --git a/common/src/snapshot/progress_reader.rs b/modules/snapshot_bootstrapper/src/progress_reader.rs similarity index 100% rename from common/src/snapshot/progress_reader.rs rename to modules/snapshot_bootstrapper/src/progress_reader.rs From 708088c63c83aaf7a51841b3752d380f67bf2b82 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 11:10:08 -0800 Subject: [PATCH 20/28] feat: update workspace dependencies for caryatid_process and config in Cargo.toml --- common/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/Cargo.toml b/common/Cargo.toml index bfb5380b..a67896ae 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -18,7 +18,6 @@ bech32 = "0.11" bigdecimal = "0.4.8" bitmask-enum = "2.2" blake2 = "0.10.6" -bytes = { version = "1", features = ["serde"] } bs58 = "0.5" chrono = { workspace = true } crc = "3" @@ -39,11 +38,12 @@ rayon = "1.11.0" cryptoxide = "0.5.1" thiserror = "2.0.17" sha2 = "0.10.8" -caryatid_process = { workspace = true } -config = { workspace = true } [dev-dependencies] tempfile = "3.23" +config = { workspace = true } +caryatid_process = { workspace = true } + [lib] crate-type = ["rlib"] From cec71c4629dd6df5651bf297009397260608c55d Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 11:10:08 -0800 Subject: [PATCH 21/28] feat: update workspace dependencies for caryatid_process and config in Cargo.toml --- Cargo.lock | 4 ---- common/Cargo.toml | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b5b914f9..2e78d73d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,7 +24,6 @@ dependencies = [ "bitmask-enum", "blake2 0.10.6", "bs58", - "bytes", "caryatid_module_clock", "caryatid_module_rest_server", "caryatid_process", @@ -1454,9 +1453,6 @@ name = "bytes" version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" -dependencies = [ - "serde", -] [[package]] name = "byteview" diff --git a/common/Cargo.toml b/common/Cargo.toml index bfb5380b..a67896ae 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -18,7 +18,6 @@ bech32 = "0.11" bigdecimal = "0.4.8" bitmask-enum = "2.2" blake2 = "0.10.6" -bytes = { version = "1", features = ["serde"] } bs58 = "0.5" chrono = { workspace = true } crc = "3" @@ -39,11 +38,12 @@ rayon = "1.11.0" cryptoxide = "0.5.1" thiserror = "2.0.17" sha2 = "0.10.8" -caryatid_process = { workspace = true } -config = { workspace = true } [dev-dependencies] tempfile = "3.23" +config = { workspace = true } +caryatid_process = { workspace = true } + [lib] crate-type = ["rlib"] From 084bf46b3d47c2a32ede8ca1091360e845065656 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 16:31:04 -0800 Subject: [PATCH 22/28] Refactor snapshot bootstrapper configuration and update topic naming --- .../snapshot_bootstrapper/config.default.toml | 13 ++++++ .../snapshot_bootstrapper/src/bootstrapper.rs | 2 +- .../src/configuration.rs | 40 +++++++------------ 3 files changed, 28 insertions(+), 27 deletions(-) create mode 100644 modules/snapshot_bootstrapper/config.default.toml diff --git a/modules/snapshot_bootstrapper/config.default.toml b/modules/snapshot_bootstrapper/config.default.toml new file mode 100644 index 00000000..95ff5ba1 --- /dev/null +++ b/modules/snapshot_bootstrapper/config.default.toml @@ -0,0 +1,13 @@ +# Snapshot bootstrapper default configuration + +# Network to use +network = "mainnet" + +# Directory for data storage +data-dir = "./data" + +# Topics +startup-topic = "cardano.sequence.start" +snapshot-topic = "cardano.snapshot" +bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" +completion-topic = "cardano.snapshot.complete" \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index c0dce50d..39befc31 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -53,7 +53,7 @@ impl SnapshotBootstrapper { info!(" Completing with '{}'", cfg.completion_topic); let startup_sub = context.subscribe(&cfg.startup_topic).await?; - let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_topic).await?; + let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_subscribe_topic).await?; context.clone().run(async move { let span = info_span!("snapshot_bootstrapper.handle"); diff --git a/modules/snapshot_bootstrapper/src/configuration.rs b/modules/snapshot_bootstrapper/src/configuration.rs index 71347730..ba52d27a 100644 --- a/modules/snapshot_bootstrapper/src/configuration.rs +++ b/modules/snapshot_bootstrapper/src/configuration.rs @@ -21,40 +21,28 @@ pub enum ConfigError { MalformedSnapshotsFile(PathBuf, serde_json::Error), } -const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; -const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; -const DEFAULT_COMPLETION_TOPIC: &str = "cardano.snapshot.complete"; -const DEFAULT_BOOTSTRAPPED_TOPIC: &str = "cardano.sequence.bootstrapped"; - /// Configuration for the snapshot bootstrapper -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "kebab-case")] pub struct SnapshotConfig { pub network: String, pub data_dir: String, pub startup_topic: String, pub snapshot_topic: String, - pub bootstrapped_topic: String, + pub bootstrapped_subscribe_topic: String, pub completion_topic: String, } impl SnapshotConfig { pub fn try_load(config: &Config) -> Result { - Ok(Self { - network: config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()), - data_dir: config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()), - startup_topic: config - .get_string("startup-topic") - .unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()), - snapshot_topic: config - .get_string("snapshot-topic") - .unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()), - bootstrapped_topic: config - .get_string("bootstrapped-subscribe-topic") - .unwrap_or(DEFAULT_BOOTSTRAPPED_TOPIC.to_string()), - completion_topic: config - .get_string("completion-topic") - .unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()), - }) + let full_config = Config::builder() + .add_source(config::File::from_str( + include_str!("../config.default.toml"), + config::FileFormat::Toml, + )) + .add_source(config.clone()) + .build()?; + Ok(full_config.try_deserialize()?) } pub fn network_dir(&self) -> String { @@ -175,7 +163,7 @@ mod tests { data_dir: "./data".to_string(), startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), - bootstrapped_topic: "bootstrapped".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), }; @@ -189,7 +177,7 @@ mod tests { data_dir: "/var/data".to_string(), startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), - bootstrapped_topic: "bootstrapped".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), }; @@ -203,7 +191,7 @@ mod tests { data_dir: "./data".to_string(), startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), - bootstrapped_topic: "bootstrapped".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), }; From 15e382f7968f1cf1d37424cdd331d5d7fccc8c42 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Mon, 24 Nov 2025 16:31:04 -0800 Subject: [PATCH 23/28] Refactor snapshot bootstrapper configuration and update topic naming --- .../snapshot_bootstrapper/config.default.toml | 28 +++++++++++++ .../snapshot_bootstrapper/src/bootstrapper.rs | 2 +- .../src/configuration.rs | 40 +++++++------------ 3 files changed, 43 insertions(+), 27 deletions(-) create mode 100644 modules/snapshot_bootstrapper/config.default.toml diff --git a/modules/snapshot_bootstrapper/config.default.toml b/modules/snapshot_bootstrapper/config.default.toml new file mode 100644 index 00000000..172222c2 --- /dev/null +++ b/modules/snapshot_bootstrapper/config.default.toml @@ -0,0 +1,28 @@ +# Snapshot bootstrapper default configuration + +# The Cardano network to connect to (e.g., "mainnet", "preprod", "preview") +network = "mainnet" + +# Base directory for storing network data, snapshots, and configuration files. +# Network-specific files will be stored in a subdirectory: // +# Expected files: config.json (network config) and snapshots.json (snapshot metadata) +data-dir = "./data" + +# Topic to subscribe to for receiving the initial startup signal. +# The bootstrapper waits for a message on this topic before beginning any work. +startup-topic = "cardano.sequence.start" + +# Topic for publishing snapshot data during processing. +# Publishes a Startup message when snapshot processing begins, followed by +# snapshot content (UTXOs, pools, accounts, DReps, governance proposals). +snapshot-topic = "cardano.snapshot" + +# Topic to subscribe to for genesis bootstrap completion. +# The bootstrapper waits for a GenesisComplete message on this topic +# before proceeding with snapshot processing. +bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" + +# Topic for publishing the final SnapshotComplete message. +# Signals that all snapshots have been fully processed and the +# blockchain state is ready for downstream consumers. +completion-topic = "cardano.snapshot.complete" \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index c0dce50d..39befc31 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -53,7 +53,7 @@ impl SnapshotBootstrapper { info!(" Completing with '{}'", cfg.completion_topic); let startup_sub = context.subscribe(&cfg.startup_topic).await?; - let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_topic).await?; + let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_subscribe_topic).await?; context.clone().run(async move { let span = info_span!("snapshot_bootstrapper.handle"); diff --git a/modules/snapshot_bootstrapper/src/configuration.rs b/modules/snapshot_bootstrapper/src/configuration.rs index 71347730..ba52d27a 100644 --- a/modules/snapshot_bootstrapper/src/configuration.rs +++ b/modules/snapshot_bootstrapper/src/configuration.rs @@ -21,40 +21,28 @@ pub enum ConfigError { MalformedSnapshotsFile(PathBuf, serde_json::Error), } -const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; -const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; -const DEFAULT_COMPLETION_TOPIC: &str = "cardano.snapshot.complete"; -const DEFAULT_BOOTSTRAPPED_TOPIC: &str = "cardano.sequence.bootstrapped"; - /// Configuration for the snapshot bootstrapper -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "kebab-case")] pub struct SnapshotConfig { pub network: String, pub data_dir: String, pub startup_topic: String, pub snapshot_topic: String, - pub bootstrapped_topic: String, + pub bootstrapped_subscribe_topic: String, pub completion_topic: String, } impl SnapshotConfig { pub fn try_load(config: &Config) -> Result { - Ok(Self { - network: config.get_string("network").unwrap_or_else(|_| "mainnet".to_string()), - data_dir: config.get_string("data-dir").unwrap_or_else(|_| "./data".to_string()), - startup_topic: config - .get_string("startup-topic") - .unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()), - snapshot_topic: config - .get_string("snapshot-topic") - .unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()), - bootstrapped_topic: config - .get_string("bootstrapped-subscribe-topic") - .unwrap_or(DEFAULT_BOOTSTRAPPED_TOPIC.to_string()), - completion_topic: config - .get_string("completion-topic") - .unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()), - }) + let full_config = Config::builder() + .add_source(config::File::from_str( + include_str!("../config.default.toml"), + config::FileFormat::Toml, + )) + .add_source(config.clone()) + .build()?; + Ok(full_config.try_deserialize()?) } pub fn network_dir(&self) -> String { @@ -175,7 +163,7 @@ mod tests { data_dir: "./data".to_string(), startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), - bootstrapped_topic: "bootstrapped".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), }; @@ -189,7 +177,7 @@ mod tests { data_dir: "/var/data".to_string(), startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), - bootstrapped_topic: "bootstrapped".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), }; @@ -203,7 +191,7 @@ mod tests { data_dir: "./data".to_string(), startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), - bootstrapped_topic: "bootstrapped".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), }; From 35f4301941e3f48a3b5904b413fd4326208c6422 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Tue, 25 Nov 2025 08:04:00 -0800 Subject: [PATCH 24/28] Update NOTES.md to clarify snapshot bootstrapping process and data requirements --- modules/snapshot_bootstrapper/NOTES.md | 39 +++++++++---------- .../data/mainnet/headers.json | 4 -- .../data/mainnet/nonces.json | 7 ---- 3 files changed, 19 insertions(+), 31 deletions(-) delete mode 100644 modules/snapshot_bootstrapper/data/mainnet/headers.json delete mode 100644 modules/snapshot_bootstrapper/data/mainnet/nonces.json diff --git a/modules/snapshot_bootstrapper/NOTES.md b/modules/snapshot_bootstrapper/NOTES.md index 35d40e34..97673b66 100644 --- a/modules/snapshot_bootstrapper/NOTES.md +++ b/modules/snapshot_bootstrapper/NOTES.md @@ -6,27 +6,27 @@ needed to boot from a snapshot file. See [snapshot_bootstrapper](src/bootstrapper.rs) for the process that references and runs with these helpers. -Booting from a snapshot takes minutes instead of the hours it takes to boot from -genesis. It also allows booting from a given epoch which allows one to create tests +Booting from a snapshot should take minutes instead of the hours it takes to boot from +genesis. It also allows booting from a given epoch, which allows one to create tests that rely only on that epoch of data. We're also skipping some of the problematic -eras and will typically boot from Conway around epoch 305, 306, and 307. It takes -three epochs to have enough context to correctly calculate the rewards. +eras and will typically boot from Conway. At the moment, we're confident +it takes only 1 NewEpochState cbor dump to bootstrap the node. The required data for bootstrapping are: -- snapshot files (each has an associated epoch number and point) -- nonces -- headers +- snapshot files (each has an associated epoch number and point (slot + block hash)) +- nonces (not implemented yet) +- headers (not implemented yet) ## Snapshot Files -The snapshots come from the Amaru project. In their words, -"the snapshots we generated are different [from a Mithril snapshot]: they're -the actual ledger state; i.e. the in-memory state that is constructed by iterating over each block up to a specific -point. So, it's all the UTxOs, the set of pending governance actions, the account balance, etc. -If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from disk. -The format of these is completely non-standard; we just forked the haskell node and spit out whatever we needed to in -CBOR." +The snapshot approach comes from the Amaru project. In their words, +"the snapshots we generated are different from a NewEpochState dump that's requested from an Ogmios GetCBOR endpoint +with a synchronizing node: they're the actual ledger state; i.e. the in-memory state that is constructed by iterating +over each block up to a specific point. So, it's all the UTxOs, the set of pending governance actions, the account +balance, etc. +If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from the +disk." Snapshot files are referenced by their epoch number in the config.json file below. @@ -68,20 +68,19 @@ for each of the three snapshot files. Loading occurs in this order: 5. Filter snapshots based on epochs specified in config.json 6. Download snapshot files (skips if already present) 7. Publish `SnapshotMessage::Startup` to the snapshot topic -8. Parse each snapshot file using the streaming parser +8. Parse each snapshot file sequentially using the [streaming_snapshot](../../common/src/snapshot/streaming_snapshot.rs) 9. Publish `CardanoMessage::SnapshotComplete` with final block info to the completion topic Modules in the system will have subscribed to the startup and completion topics before the bootstrapper runs the above sequence. Upon receiving snapshot data messages, -they will use the data to populate their state, history (for BlockFrost), -and any other state required to achieve readiness to operate. +they will use the data to populate their state, and any other state required to achieve readiness to operate. ## Data update messages The bootstrapper publishes data as it parses the snapshot files using the `SnapshotPublisher`. Snapshot parsing is done while streaming the data to keep the memory footprint lower. As elements of the file are parsed, callbacks provide the data -to the publisher which can then publish structured data on the message bus. +to the publisher, which can then publish structured data on the message bus. The `SnapshotPublisher` implements the streaming snapshot callbacks: @@ -92,9 +91,9 @@ The `SnapshotPublisher` implements the streaming snapshot callbacks: - `ProposalCallback`: Receives governance proposals - `SnapshotCallbacks`: Receives metadata and completion signals -Currently the publisher just accumulates this data, but this will need to be extended to publish the corresponding +Currently, the publisher just accumulates this data, but this will need to be extended to publish the corresponding message types. Publishing of detailed snapshot data to downstream modules can be added by implementing the -appropriate message bus publishes in the callback methods. +appropriate message bus publishing in the callback methods. ## Configuration diff --git a/modules/snapshot_bootstrapper/data/mainnet/headers.json b/modules/snapshot_bootstrapper/data/mainnet/headers.json deleted file mode 100644 index adbfb813..00000000 --- a/modules/snapshot_bootstrapper/data/mainnet/headers.json +++ /dev/null @@ -1,4 +0,0 @@ -[ - "134524751.93d554d67c46749f45fba3a091857a9c489ad3ed1d2c7b32b587ab290bec51f5", - "134956761.13cb4a62597e36fad2dba4e00974ec5ac29c3824d96b2ceb4ce056271cd4f8da" -] \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/data/mainnet/nonces.json b/modules/snapshot_bootstrapper/data/mainnet/nonces.json deleted file mode 100644 index 3d38d6f5..00000000 --- a/modules/snapshot_bootstrapper/data/mainnet/nonces.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "at": "134956789.6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c", - "active": "0b9e320e63bf995b81287ce7a624b6735d98b083cc1a0e2ae8b08b680c79c983", - "candidate": "6cc4dafecbe0d593ca0dee64518542f5faa741538791ac7fc2d5008f32d5c4d5", - "evolving": "f5589f01dd0efd0add0c58e8b27dc73ba3fcd662d9026b3fedbf06c648adb313", - "tail": "29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494" -} \ No newline at end of file From a982790e9e9daa66227fcc27ad5f4bed5f0da3db Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Tue, 25 Nov 2025 09:14:35 -0800 Subject: [PATCH 25/28] chore: move tempfile to dev deps --- common/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/common/Cargo.toml b/common/Cargo.toml index de7b54f6..63473f4e 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -29,7 +29,6 @@ regex = "1" serde = { workspace = true, features = ["rc"] } serde_json = { workspace = true } serde_with = { workspace = true, features = ["base64"] } -tempfile = "3" tokio = { workspace = true } tracing = { workspace = true } futures = "0.3.31" From c971d8c59739b2aad15f4cae303a02a636f59e96 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Tue, 25 Nov 2025 11:42:42 -0800 Subject: [PATCH 26/28] feat: take stab at building block info to hand off --- Cargo.lock | 1 + modules/snapshot_bootstrapper/Cargo.toml | 1 + .../snapshot_bootstrapper/src/bootstrapper.rs | 45 +++++++++++++------ .../src/configuration.rs | 11 +++++ .../snapshot_bootstrapper/src/publisher.rs | 4 -- 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71c9e942..2480e61a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,6 +390,7 @@ dependencies = [ "config", "flate2", "futures-util", + "hex", "reqwest 0.12.24", "serde", "serde_json", diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index 1f00d1f8..35cb7b84 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -24,6 +24,7 @@ async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } reqwest = { version = "0.12", features = ["stream"] } futures-util = "0.3.31" tokio-util = "0.7.17" +hex = "0.4.3" [dev-dependencies] wiremock = "0.6.5" diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index 39befc31..022138cc 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -6,6 +6,7 @@ mod publisher; use crate::configuration::{ConfigError, NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; use crate::downloader::{DownloadError, SnapshotDownloader}; use crate::publisher::SnapshotPublisher; +use acropolis_common::genesis_values::GenesisValues; use acropolis_common::snapshot::streaming_snapshot::StreamingSnapshotParser; use acropolis_common::{ messages::{CardanoMessage, Message}, @@ -180,11 +181,15 @@ impl SnapshotBootstrapper { Self::parse_snapshot(&file_path, &mut publisher).await?; } - let metadata = publisher - .metadata() - .ok_or_else(|| anyhow::anyhow!("No metadata received from snapshots"))?; + let last_snapshot = + snapshots.last().ok_or_else(|| anyhow::anyhow!("No snapshots to process"))?; + + let block_info = build_block_info_from_metadata(last_snapshot).map_err(|e| { + BootstrapError::Parse(format!( + "Failed to build block info from snapshot metadata: {e}" + )) + })?; - let block_info = build_block_info_from_metadata(metadata); publisher.publish_completion(block_info).await?; Ok(()) @@ -204,18 +209,32 @@ impl SnapshotBootstrapper { } } -fn build_block_info_from_metadata( - metadata: &acropolis_common::snapshot::streaming_snapshot::SnapshotMetadata, -) -> BlockInfo { - BlockInfo { +fn build_block_info_from_metadata(metadata: &SnapshotFileMetadata) -> Result { + let (slot, block_hash_str) = metadata + .parse_point() + .ok_or_else(|| anyhow::anyhow!("Invalid point format: {}", metadata.point))?; + + let hash = BlockHash::try_from(hex::decode(block_hash_str)?) + .map_err(|e| anyhow::anyhow!("Invalid block hash hex: {:?}", e))?; + + let genesis = GenesisValues::mainnet(); + let epoch_slot = slot - genesis.epoch_to_first_slot(slot); + let timestamp = genesis.slot_to_timestamp(slot); + + info!( + "Block info built: slot={}, hash={}, epoch={}, slot_in_epoch={}, timestamp={}", + slot, hash, metadata.epoch, epoch_slot, timestamp + ); + + Ok(BlockInfo { status: BlockStatus::Immutable, - slot: 0, + slot, number: 0, - hash: BlockHash::default(), + hash, epoch: metadata.epoch, - epoch_slot: 0, + epoch_slot, new_epoch: false, - timestamp: 0, + timestamp, era: Era::Conway, - } + }) } diff --git a/modules/snapshot_bootstrapper/src/configuration.rs b/modules/snapshot_bootstrapper/src/configuration.rs index ba52d27a..407ad165 100644 --- a/modules/snapshot_bootstrapper/src/configuration.rs +++ b/modules/snapshot_bootstrapper/src/configuration.rs @@ -108,6 +108,17 @@ impl SnapshotFileMetadata { Ok(snapshots) } + pub fn parse_point(&self) -> Option<(u64, String)> { + let parts: Vec<&str> = self.point.splitn(2, '.').collect(); + if parts.len() == 2 { + let slot = parts[0].parse().ok()?; + let hash = parts[1].to_string(); + Some((slot, hash)) + } else { + None + } + } + pub fn file_path(&self, network_dir: &str) -> String { format!("{}/{}.cbor", network_dir, self.point) } diff --git a/modules/snapshot_bootstrapper/src/publisher.rs b/modules/snapshot_bootstrapper/src/publisher.rs index 2a114688..61a3d904 100644 --- a/modules/snapshot_bootstrapper/src/publisher.rs +++ b/modules/snapshot_bootstrapper/src/publisher.rs @@ -58,10 +58,6 @@ impl SnapshotPublisher { ))); self.context.publish(&self.completion_topic, message).await } - - pub fn metadata(&self) -> Option<&SnapshotMetadata> { - self.metadata.as_ref() - } } impl UtxoCallback for SnapshotPublisher { From 6afe16ff63da48f0acdab9262f2ecc8298a1a426 Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Wed, 26 Nov 2025 09:45:39 -0800 Subject: [PATCH 27/28] refactor: remove startup signal handling from bootstrapper --- modules/snapshot_bootstrapper/config.default.toml | 4 ---- modules/snapshot_bootstrapper/src/bootstrapper.rs | 13 ------------- modules/snapshot_bootstrapper/src/configuration.rs | 4 ---- 3 files changed, 21 deletions(-) diff --git a/modules/snapshot_bootstrapper/config.default.toml b/modules/snapshot_bootstrapper/config.default.toml index 172222c2..b60f2968 100644 --- a/modules/snapshot_bootstrapper/config.default.toml +++ b/modules/snapshot_bootstrapper/config.default.toml @@ -8,10 +8,6 @@ network = "mainnet" # Expected files: config.json (network config) and snapshots.json (snapshot metadata) data-dir = "./data" -# Topic to subscribe to for receiving the initial startup signal. -# The bootstrapper waits for a message on this topic before beginning any work. -startup-topic = "cardano.sequence.start" - # Topic for publishing snapshot data during processing. # Publishes a Startup message when snapshot processing begins, followed by # snapshot content (UTXOs, pools, accounts, DReps, governance proposals). diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index 022138cc..49d6bb24 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -53,18 +53,11 @@ impl SnapshotBootstrapper { info!(" Publishing on '{}'", cfg.snapshot_topic); info!(" Completing with '{}'", cfg.completion_topic); - let startup_sub = context.subscribe(&cfg.startup_topic).await?; let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_subscribe_topic).await?; context.clone().run(async move { let span = info_span!("snapshot_bootstrapper.handle"); async { - // Wait for the startup signal - if let Err(e) = Self::wait_startup(startup_sub).await { - error!("Failed waiting for startup: {e:#}"); - return; - } - // Wait for genesis bootstrap completion if let Err(e) = Self::wait_genesis_completion(bootstrapped_sub).await { error!("Failed waiting for bootstrapped: {e:#}"); @@ -138,12 +131,6 @@ impl SnapshotBootstrapper { Ok(()) } - async fn wait_startup(mut subscription: Box>) -> Result<()> { - let (_, _message) = subscription.read().await?; - info!("Received startup message"); - Ok(()) - } - async fn wait_genesis_completion( mut subscription: Box>, ) -> Result<()> { diff --git a/modules/snapshot_bootstrapper/src/configuration.rs b/modules/snapshot_bootstrapper/src/configuration.rs index 407ad165..0e4a0d44 100644 --- a/modules/snapshot_bootstrapper/src/configuration.rs +++ b/modules/snapshot_bootstrapper/src/configuration.rs @@ -27,7 +27,6 @@ pub enum ConfigError { pub struct SnapshotConfig { pub network: String, pub data_dir: String, - pub startup_topic: String, pub snapshot_topic: String, pub bootstrapped_subscribe_topic: String, pub completion_topic: String, @@ -172,7 +171,6 @@ mod tests { let config = SnapshotConfig { network: "mainnet".to_string(), data_dir: "./data".to_string(), - startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), @@ -186,7 +184,6 @@ mod tests { let config = SnapshotConfig { network: "preprod".to_string(), data_dir: "/var/data".to_string(), - startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), @@ -200,7 +197,6 @@ mod tests { let config = SnapshotConfig { network: "mainnet".to_string(), data_dir: "./data".to_string(), - startup_topic: "startup".to_string(), snapshot_topic: "snapshot".to_string(), bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), From c96e2018a6fa45155ede2999d813fc57c41178de Mon Sep 17 00:00:00 2001 From: Matthew Hounslow Date: Wed, 26 Nov 2025 10:32:17 -0800 Subject: [PATCH 28/28] feat: implement single snapshot download and processing with configurable timeouts --- modules/snapshot_bootstrapper/NOTES.md | 55 +++--- modules/snapshot_bootstrapper/README.md | 31 +++- .../snapshot_bootstrapper/config.default.toml | 13 +- .../data/mainnet/config.json | 6 +- .../snapshot_bootstrapper/src/bootstrapper.rs | 86 ++++++---- .../src/configuration.rs | 95 +++++++++-- .../snapshot_bootstrapper/src/downloader.rs | 161 +++++++++++++----- 7 files changed, 321 insertions(+), 126 deletions(-) diff --git a/modules/snapshot_bootstrapper/NOTES.md b/modules/snapshot_bootstrapper/NOTES.md index 97673b66..0e766e94 100644 --- a/modules/snapshot_bootstrapper/NOTES.md +++ b/modules/snapshot_bootstrapper/NOTES.md @@ -9,12 +9,12 @@ references and runs with these helpers. Booting from a snapshot should take minutes instead of the hours it takes to boot from genesis. It also allows booting from a given epoch, which allows one to create tests that rely only on that epoch of data. We're also skipping some of the problematic -eras and will typically boot from Conway. At the moment, we're confident -it takes only 1 NewEpochState cbor dump to bootstrap the node. +eras and will typically boot from Conway. It takes only 1 NewEpochState cbor dump +to bootstrap the node. The required data for bootstrapping are: -- snapshot files (each has an associated epoch number and point (slot + block hash)) +- snapshot file (with an associated epoch number and point (slot + block hash)) - nonces (not implemented yet) - headers (not implemented yet) @@ -28,7 +28,7 @@ balance, etc. If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from the disk." -Snapshot files are referenced by their epoch number in the config.json file below. +The snapshot file is referenced by its epoch number in the config.json file below. See [Amaru snapshot format](../../docs/amaru-snapshot-structure.md) @@ -46,7 +46,7 @@ be used as a prefix to resolve per-network configuration files needed for bootstrapping. Given a source directory `data`, and a a network name of `preview`, the expected layout for configuration files would be: -* `data/preview/config.json`: a list of epochs to load and points +* `data/preview/config.json`: the epoch to load and points * `data/preview/snapshots.json`: a list of `SnapshotFileMetadata` values (epoch, point, url) These files are loaded by [snapshot_bootstrapper](src/bootstrapper.rs) @@ -57,19 +57,18 @@ during bootup. The bootstrapper will be started with a configuration that specifies a network, e.g. "mainnet". From the network, it will build a path to the configuration and snapshot files as shown above, then load the data contained or described -in those files. config.json holds a list of typically 3 epochs that can be -used to index into snapshots.json to find the corresponding URLs and meta-data -for each of the three snapshot files. Loading occurs in this order: - -1. Wait for `startup-topic` message (typically `cardano.sequence.start`) -2. Wait for `bootstrapped-topic` message with genesis values (typically `cardano.sequence.bootstrapped`) -3. Load network configuration from `config.json` -4. Load snapshot metadata from `snapshots.json` -5. Filter snapshots based on epochs specified in config.json -6. Download snapshot files (skips if already present) -7. Publish `SnapshotMessage::Startup` to the snapshot topic -8. Parse each snapshot file sequentially using the [streaming_snapshot](../../common/src/snapshot/streaming_snapshot.rs) -9. Publish `CardanoMessage::SnapshotComplete` with final block info to the completion topic +in those files. config.json holds a single epoch that is used to look up the +corresponding URL and metadata in snapshots.json for the snapshot file. +Loading occurs in this order: + +1. Wait for `bootstrapped-topic` message with genesis values (typically `cardano.sequence.bootstrapped`) +2. Load network configuration from `config.json` +3. Load snapshot metadata from `snapshots.json` +4. Find snapshot matching the epoch specified in config.json +5. Download snapshot file (skips if already present) +6. Publish `SnapshotMessage::Startup` to the snapshot topic +7. Parse the snapshot file using the [streaming_snapshot](../../common/src/snapshot/streaming_snapshot.rs) +8. Publish `CardanoMessage::SnapshotComplete` with final block info to the completion topic Modules in the system will have subscribed to the startup and completion topics before the bootstrapper runs the above sequence. Upon receiving snapshot data messages, @@ -77,7 +76,7 @@ they will use the data to populate their state, and any other state required to ## Data update messages -The bootstrapper publishes data as it parses the snapshot files using the `SnapshotPublisher`. +The bootstrapper publishes data as it parses the snapshot file using the `SnapshotPublisher`. Snapshot parsing is done while streaming the data to keep the memory footprint lower. As elements of the file are parsed, callbacks provide the data to the publisher, which can then publish structured data on the message bus. @@ -101,7 +100,21 @@ The bootstrapper supports the following configuration options: - `network`: Network name (default: "mainnet") - `data-dir`: Base directory for network data (default: "./data") -- `startup-topic`: Topic to wait for startup signal (default: "cardano.sequence.start") - `snapshot-topic`: Topic to publish snapshot messages (default: "cardano.snapshot") - `bootstrapped-subscribe-topic`: Topic to receive genesis completion (default: "cardano.sequence.bootstrapped") -- `completion-topic`: Topic to publish completion signal (default: "cardano.snapshot.complete") \ No newline at end of file +- `completion-topic`: Topic to publish completion signal (default: "cardano.snapshot.complete") + +## Example config.json + +```json +{ + "snapshot": 500, + "points": [ + { + "epoch": 500, + "id": "abc123...", + "slot": 12345678 + } + ] +} +``` \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/README.md b/modules/snapshot_bootstrapper/README.md index 8ea90146..b42d9379 100644 --- a/modules/snapshot_bootstrapper/README.md +++ b/modules/snapshot_bootstrapper/README.md @@ -1,6 +1,6 @@ # Snapshot Bootstrapper Module -The snapshot bootstrapper module downloads and processes Cardano ledger snapshots to initialize system state before +The snapshot bootstrapper module downloads and processes a Cardano ledger snapshot to initialize system state before processing the live chain. ## Overview @@ -8,7 +8,7 @@ processing the live chain. This module: 1. Waits for genesis bootstrap completion -2. Downloads compressed snapshot files from configured URLs +2. Downloads a compressed snapshot file from a configured URL 3. Streams and publishes snapshot data (UTXOs, pools, accounts, DReps, proposals) 4. Signals completion to allow chain synchronization to begin @@ -16,7 +16,6 @@ This module: The snapshot bootstrapper: -- **Subscribes to** `cardano.sequence.start` - Waits for startup signal - **Subscribes to** `cardano.sequence.bootstrapped` - Waits for genesis completion - **Publishes to** `cardano.snapshot` - Streams snapshot data during processing - **Publishes to** `cardano.snapshot.complete` - Signals completion with block info @@ -31,17 +30,37 @@ network = "mainnet" data-dir = "./data" # Message topics -startup-topic = "cardano.sequence.start" snapshot-topic = "cardano.snapshot" bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" completion-topic = "cardano.snapshot.complete" + +# Download settings +[download] +timeout-secs = 300 +connect-timeout-secs = 30 +progress-log-interval = 200 ``` ## Directory Structure The module expects the following files in `{data-dir}/{network}/`: -- **`config.json`** - Network configuration specifying which snapshot epochs to load +- **`config.json`** - Network configuration specifying which snapshot epoch to load - **`snapshots.json`** - Snapshot metadata including download URLs -Snapshot files are downloaded to `{data-dir}/{network}/{point}.cbor`. \ No newline at end of file +The snapshot file is downloaded to `{data-dir}/{network}/{point}.cbor`. + +## Example config.json + +```json +{ + "snapshot": 500, + "points": [ + { + "epoch": 500, + "id": "abc123...", + "slot": 12345678 + } + ] +} +``` \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/config.default.toml b/modules/snapshot_bootstrapper/config.default.toml index b60f2968..a55b9726 100644 --- a/modules/snapshot_bootstrapper/config.default.toml +++ b/modules/snapshot_bootstrapper/config.default.toml @@ -21,4 +21,15 @@ bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" # Topic for publishing the final SnapshotComplete message. # Signals that all snapshots have been fully processed and the # blockchain state is ready for downstream consumers. -completion-topic = "cardano.snapshot.complete" \ No newline at end of file +completion-topic = "cardano.snapshot.complete" + +[download] +# Total request timeout in seconds (default: 5 minutes) +timeout-secs = 300 + +# Connection timeout in seconds +connect-timeout-secs = 30 + +# How often to log download progress, measured in number of bytes received. +# Lower values provide more frequent updates but will increase log volume. +progress-log-interval = 200 \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/data/mainnet/config.json b/modules/snapshot_bootstrapper/data/mainnet/config.json index a1bb266a..59bcc8eb 100644 --- a/modules/snapshot_bootstrapper/data/mainnet/config.json +++ b/modules/snapshot_bootstrapper/data/mainnet/config.json @@ -1,9 +1,5 @@ { - "snapshots": [ - 507, - 508, - 509 - ], + "snapshot": 507, "points": [ { "epoch": 507, diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs index 49d6bb24..b931b898 100644 --- a/modules/snapshot_bootstrapper/src/bootstrapper.rs +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -31,6 +31,9 @@ pub enum BootstrapError { #[error("Snapshot parsing failed: {0}")] Parse(String), + #[error("Snapshot not found for epoch {0}")] + SnapshotNotFound(u64), + #[error(transparent)] Other(#[from] anyhow::Error), } @@ -52,6 +55,14 @@ impl SnapshotBootstrapper { info!(" Data directory: {}", cfg.data_dir); info!(" Publishing on '{}'", cfg.snapshot_topic); info!(" Completing with '{}'", cfg.completion_topic); + info!( + " Download timeouts: {}s total, {}s connect", + cfg.download.timeout_secs, cfg.download.connect_timeout_secs + ); + info!( + " Progress log interval: {} chunks", + cfg.download.progress_log_interval + ); let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_subscribe_topic).await?; @@ -85,23 +96,28 @@ impl SnapshotBootstrapper { } }; - // Filter snapshots based on network config - let target_snapshots = SnapshotFileMetadata::filter_by_epochs( + // Find the target snapshot based on network config + let target_snapshot = match SnapshotFileMetadata::find_by_epoch( &all_snapshots, - &network_config.snapshots, - ); - if target_snapshots.is_empty() { - error!( - "No snapshots found for requested epochs: {:?}", - network_config.snapshots - ); - return; - } + network_config.snapshot, + ) { + Some(snapshot) => snapshot, + None => { + error!( + "No snapshot found for requested epoch: {}", + network_config.snapshot + ); + return; + } + }; - info!("Found {} snapshot(s) to process", target_snapshots.len()); + info!( + "Found snapshot for epoch {} at point {}", + target_snapshot.epoch, target_snapshot.point + ); - // Create downloader and download all snapshots - let downloader = match SnapshotDownloader::new(cfg.network_dir()) { + // Create downloader and download the snapshot + let downloader = match SnapshotDownloader::new(cfg.network_dir(), &cfg.download) { Ok(d) => d, Err(e) => { error!("Failed to create snapshot downloader: {e:#}"); @@ -109,16 +125,20 @@ impl SnapshotBootstrapper { } }; - if let Err(e) = downloader.download_all(&target_snapshots).await { - error!("Failed to download snapshots: {e:#}"); - return; - } + let file_path = match downloader.download(&target_snapshot).await { + Ok(path) => path, + Err(e) => { + error!("Failed to download snapshot: {e:#}"); + return; + } + }; - // Process snapshots in order + // Process the snapshot if let Err(e) = - Self::process_snapshots(&target_snapshots, &cfg, context.clone()).await + Self::process_snapshot(&target_snapshot, &file_path, &cfg, context.clone()) + .await { - error!("Failed to process snapshots: {e:#}"); + error!("Failed to process snapshot: {e:#}"); return; } @@ -144,8 +164,9 @@ impl SnapshotBootstrapper { } } - async fn process_snapshots( - snapshots: &[SnapshotFileMetadata], + async fn process_snapshot( + snapshot_meta: &SnapshotFileMetadata, + file_path: &str, cfg: &SnapshotConfig, context: Arc>, ) -> Result<()> { @@ -157,21 +178,14 @@ impl SnapshotBootstrapper { publisher.publish_start().await?; - for snapshot_meta in snapshots { - let file_path = snapshot_meta.file_path(&cfg.network_dir()); - - info!( - "Processing snapshot for epoch {} from {}", - snapshot_meta.epoch, file_path - ); - - Self::parse_snapshot(&file_path, &mut publisher).await?; - } + info!( + "Processing snapshot for epoch {} from {}", + snapshot_meta.epoch, file_path + ); - let last_snapshot = - snapshots.last().ok_or_else(|| anyhow::anyhow!("No snapshots to process"))?; + Self::parse_snapshot(file_path, &mut publisher).await?; - let block_info = build_block_info_from_metadata(last_snapshot).map_err(|e| { + let block_info = build_block_info_from_metadata(snapshot_meta).map_err(|e| { BootstrapError::Parse(format!( "Failed to build block info from snapshot metadata: {e}" )) diff --git a/modules/snapshot_bootstrapper/src/configuration.rs b/modules/snapshot_bootstrapper/src/configuration.rs index 0e4a0d44..e7c18837 100644 --- a/modules/snapshot_bootstrapper/src/configuration.rs +++ b/modules/snapshot_bootstrapper/src/configuration.rs @@ -30,8 +30,52 @@ pub struct SnapshotConfig { pub snapshot_topic: String, pub bootstrapped_subscribe_topic: String, pub completion_topic: String, + #[serde(default)] + pub download: DownloadConfig, } +/// Configuration for snapshot downloads +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct DownloadConfig { + /// Total request timeout in seconds + #[serde(default = "DownloadConfig::default_timeout_secs")] + pub timeout_secs: u64, + + /// Connection timeout in seconds + #[serde(default = "DownloadConfig::default_connect_timeout_secs")] + pub connect_timeout_secs: u64, + + /// How often to log download progress (in number of chunks) + #[serde(default = "DownloadConfig::default_progress_log_interval")] + pub progress_log_interval: u64, +} + +impl Default for DownloadConfig { + fn default() -> Self { + Self { + timeout_secs: Self::default_timeout_secs(), + connect_timeout_secs: Self::default_connect_timeout_secs(), + progress_log_interval: Self::default_progress_log_interval(), + } + } +} + +impl DownloadConfig { + fn default_timeout_secs() -> u64 { + 300 // 5 minutes + } + + fn default_connect_timeout_secs() -> u64 { + 30 + } + + fn default_progress_log_interval() -> u64 { + 200 + } +} + +/// Snapshot bootstrapper configuration impl SnapshotConfig { pub fn try_load(config: &Config) -> Result { let full_config = Config::builder() @@ -61,7 +105,7 @@ impl SnapshotConfig { #[derive(Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub struct NetworkConfig { - pub snapshots: Vec, + pub snapshot: u64, pub points: Vec, } @@ -122,8 +166,8 @@ impl SnapshotFileMetadata { format!("{}/{}.cbor", network_dir, self.point) } - pub fn filter_by_epochs(snapshots: &[Self], epochs: &[u64]) -> Vec { - snapshots.iter().filter(|s| epochs.contains(&s.epoch)).cloned().collect() + pub fn find_by_epoch(snapshots: &[Self], epoch: u64) -> Option { + snapshots.iter().find(|s| s.epoch == epoch).cloned() } } @@ -134,9 +178,9 @@ mod tests { use std::path::Path; use tempfile::TempDir; - fn create_test_network_config(dir: &Path, snapshots: Vec) -> PathBuf { + fn create_test_network_config(dir: &Path, snapshot: u64) -> PathBuf { let config = NetworkConfig { - snapshots, + snapshot, points: vec![Point { epoch: 500, id: "test_block_hash".to_string(), @@ -166,6 +210,14 @@ mod tests { snapshots_path } + #[test] + fn test_download_config_defaults() { + let config = DownloadConfig::default(); + assert_eq!(config.timeout_secs, 300); + assert_eq!(config.connect_timeout_secs, 30); + assert_eq!(config.progress_log_interval, 200); + } + #[test] fn test_snapshot_config_network_dir() { let config = SnapshotConfig { @@ -174,6 +226,7 @@ mod tests { snapshot_topic: "snapshot".to_string(), bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), + download: DownloadConfig::default(), }; assert_eq!(config.network_dir(), "./data/mainnet"); @@ -187,6 +240,7 @@ mod tests { snapshot_topic: "snapshot".to_string(), bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), + download: DownloadConfig::default(), }; assert_eq!(config.config_path(), "/var/data/preprod/config.json"); @@ -200,6 +254,7 @@ mod tests { snapshot_topic: "snapshot".to_string(), bootstrapped_subscribe_topic: "bootstrapped".to_string(), completion_topic: "completion".to_string(), + download: DownloadConfig::default(), }; assert_eq!(config.snapshots_path(), "./data/mainnet/snapshots.json"); @@ -220,7 +275,7 @@ mod tests { } #[test] - fn test_filter_by_epochs() { + fn test_find_by_epoch_found() { let all_snapshots = vec![ SnapshotFileMetadata { epoch: 500, @@ -239,23 +294,37 @@ mod tests { }, ]; - let filtered = SnapshotFileMetadata::filter_by_epochs(&all_snapshots, &[500, 502]); + let found = SnapshotFileMetadata::find_by_epoch(&all_snapshots, 501); + + assert!(found.is_some()); + let snapshot = found.unwrap(); + assert_eq!(snapshot.epoch, 501); + assert_eq!(snapshot.point, "point_501"); + } + + #[test] + fn test_find_by_epoch_not_found() { + let all_snapshots = vec![SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "url1".to_string(), + }]; + + let found = SnapshotFileMetadata::find_by_epoch(&all_snapshots, 999); - assert_eq!(filtered.len(), 2); - assert_eq!(filtered[0].epoch, 500); - assert_eq!(filtered[1].epoch, 502); + assert!(found.is_none()); } #[test] fn test_read_network_config_success() { let temp_dir = TempDir::new().unwrap(); - let config_path = create_test_network_config(temp_dir.path(), vec![500, 501]); + let config_path = create_test_network_config(temp_dir.path(), 500); let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); assert!(result.is_ok()); let config = result.unwrap(); - assert_eq!(config.snapshots, vec![500, 501]); + assert_eq!(config.snapshot, 500); assert_eq!(config.points.len(), 1); } @@ -314,7 +383,7 @@ mod tests { let temp_dir = TempDir::new().unwrap(); let config_path = temp_dir.path().join("config.json"); let mut file = fs::File::create(&config_path).unwrap(); - file.write_all(b"{\"snapshots\": [500, 501]").unwrap(); + file.write_all(b"{\"snapshot\": 500").unwrap(); let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); assert!(result.is_err()); diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs index 779cff53..c62dd742 100644 --- a/modules/snapshot_bootstrapper/src/downloader.rs +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -1,10 +1,11 @@ -use crate::configuration::SnapshotFileMetadata; +use crate::configuration::{DownloadConfig, SnapshotFileMetadata}; use crate::progress_reader::ProgressReader; use async_compression::tokio::bufread::GzipDecoder; use futures_util::TryStreamExt; use reqwest::Client; use std::io; use std::path::{Path, PathBuf}; +use std::time::Duration; use thiserror::Error; use tokio::fs::File; use tokio::io::BufReader; @@ -32,37 +33,36 @@ pub enum DownloadError { pub struct SnapshotDownloader { client: Client, network_dir: String, + cfg: DownloadConfig, } impl SnapshotDownloader { - pub fn new(network_dir: String) -> Result { + pub fn new(network_dir: String, config: &DownloadConfig) -> Result { let client = Client::builder() - .timeout(std::time::Duration::from_mins(5)) - .connect_timeout(std::time::Duration::from_secs(30)) + .timeout(Duration::from_secs(config.timeout_secs)) + .connect_timeout(Duration::from_secs(config.connect_timeout_secs)) .build()?; Ok(Self { client, network_dir, + cfg: config.clone(), }) } - pub async fn download_all( - &self, - snapshots: &[SnapshotFileMetadata], - ) -> Result<(), DownloadError> { - for snapshot_meta in snapshots { - let file_path = snapshot_meta.file_path(&self.network_dir); - self.download_single(&snapshot_meta.url, &file_path).await?; - } - Ok(()) + /// Downloads the snapshot file specified by the metadata. + /// Returns the path to the downloaded file. + pub async fn download(&self, snapshot: &SnapshotFileMetadata) -> Result { + let file_path = snapshot.file_path(&self.network_dir); + self.download_from_url(&snapshot.url, &file_path).await?; + Ok(file_path) } /// Downloads a gzip-compressed snapshot from the given URL, decompresses it on-the-fly, /// and saves the decompressed CBOR data to the specified output path. /// The data is first written to a `.partial` temporary file to ensure atomicity /// and then renamed to the final output path upon successful completion. - pub async fn download_single(&self, url: &str, output_path: &str) -> Result<(), DownloadError> { + async fn download_from_url(&self, url: &str, output_path: &str) -> Result<(), DownloadError> { let path = Path::new(output_path); if path.exists() { @@ -100,7 +100,8 @@ impl SnapshotDownloader { let stream = response.bytes_stream().map_err(io::Error::other); let async_read = tokio_util::io::StreamReader::new(stream); - let progress_reader = ProgressReader::new(async_read, content_length, 200); + let progress_reader = + ProgressReader::new(async_read, content_length, self.cfg.progress_log_interval); let buffered = BufReader::new(progress_reader); let mut decoder = GzipDecoder::new(buffered); @@ -138,21 +139,29 @@ mod tests { encoder.finish().unwrap() } + fn default_config() -> DownloadConfig { + DownloadConfig::default() + } + #[tokio::test] async fn test_downloader_skips_existing_file() { let temp_dir = TempDir::new().unwrap(); - let file_path = temp_dir.path().join("snapshot.cbor"); + let file_path = temp_dir.path().join("point_500.cbor"); std::fs::write(&file_path, b"existing data").unwrap(); - let downloader = - SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + let downloader = SnapshotDownloader::new( + temp_dir.path().to_str().unwrap().to_string(), + &default_config(), + ) + .unwrap(); - let result = downloader - .download_single( - "https://example.com/snapshot.cbor.gz", - file_path.to_str().unwrap(), - ) - .await; + let snapshot = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "https://example.com/snapshot.cbor.gz".to_string(), + }; + + let result = downloader.download(&snapshot).await; assert!(result.is_ok()); assert_eq!(std::fs::read(&file_path).unwrap(), b"existing data"); @@ -170,14 +179,22 @@ mod tests { .await; let temp_dir = TempDir::new().unwrap(); - let file_path = temp_dir.path().join("snapshot.cbor"); - let downloader = - SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + let downloader = SnapshotDownloader::new( + temp_dir.path().to_str().unwrap().to_string(), + &default_config(), + ) + .unwrap(); + + let snapshot = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: format!("{}/snapshot.cbor.gz", mock_server.uri()), + }; - let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); - let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + let result = downloader.download(&snapshot).await; assert!(result.is_ok()); + let file_path = result.unwrap(); assert_eq!(std::fs::read(&file_path).unwrap(), b"snapshot content"); } @@ -192,17 +209,26 @@ mod tests { .await; let temp_dir = TempDir::new().unwrap(); - let file_path = temp_dir.path().join("snapshot.cbor"); - let downloader = - SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + let downloader = SnapshotDownloader::new( + temp_dir.path().to_str().unwrap().to_string(), + &default_config(), + ) + .unwrap(); + + let snapshot = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: format!("{}/snapshot.cbor.gz", mock_server.uri()), + }; - let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); - let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + let result = downloader.download(&snapshot).await; assert!(matches!( result, Err(DownloadError::InvalidStatusCode(_, _)) )); + + let file_path = temp_dir.path().join("point_500.cbor"); assert!(!file_path.exists()); } @@ -218,14 +244,21 @@ mod tests { .await; let temp_dir = TempDir::new().unwrap(); - let file_path = temp_dir.path().join("nested").join("dir").join("snapshot.cbor"); + let network_dir = temp_dir.path().join("nested").join("dir"); let downloader = - SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + SnapshotDownloader::new(network_dir.to_str().unwrap().to_string(), &default_config()) + .unwrap(); + + let snapshot = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: format!("{}/snapshot.cbor.gz", mock_server.uri()), + }; - let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); - let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + let result = downloader.download(&snapshot).await; assert!(result.is_ok()); + let file_path = network_dir.join("point_500.cbor"); assert!(file_path.exists()); } @@ -240,16 +273,56 @@ mod tests { .await; let temp_dir = TempDir::new().unwrap(); - let file_path = temp_dir.path().join("snapshot.cbor"); - let partial_path = temp_dir.path().join("snapshot.partial"); - let downloader = - SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + let downloader = SnapshotDownloader::new( + temp_dir.path().to_str().unwrap().to_string(), + &default_config(), + ) + .unwrap(); + + let snapshot = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: format!("{}/snapshot.cbor.gz", mock_server.uri()), + }; - let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); - let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + let result = downloader.download(&snapshot).await; assert!(result.is_err()); + let file_path = temp_dir.path().join("point_500.cbor"); + let partial_path = temp_dir.path().join("point_500.partial"); assert!(!file_path.exists()); assert!(!partial_path.exists()); } + + #[tokio::test] + async fn test_downloader_with_custom_config() { + let mock_server = MockServer::start().await; + let compressed = gzip_compress(b"data"); + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(compressed)) + .mount(&mock_server) + .await; + + let temp_dir = TempDir::new().unwrap(); + let config = DownloadConfig { + timeout_secs: 600, + connect_timeout_secs: 60, + progress_log_interval: 100, + }; + + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string(), &config) + .unwrap(); + + let snapshot = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: format!("{}/snapshot.cbor.gz", mock_server.uri()), + }; + + let result = downloader.download(&snapshot).await; + assert!(result.is_ok()); + } }