diff --git a/Cargo.lock b/Cargo.lock index f34c3e52..2480e61a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -385,10 +385,21 @@ version = "0.1.0" dependencies = [ "acropolis_common", "anyhow", + "async-compression", "caryatid_sdk", "config", + "flate2", + "futures-util", + "hex", + "reqwest 0.12.24", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.17", "tokio", + "tokio-util", "tracing", + "wiremock", ] [[package]] @@ -528,6 +539,7 @@ dependencies = [ "acropolis_module_parameters_state", "acropolis_module_peer_network_interface", "acropolis_module_rest_blockfrost", + "acropolis_module_snapshot_bootstrapper", "acropolis_module_spdd_state", "acropolis_module_spo_state", "acropolis_module_stake_delta_filter", @@ -860,6 +872,16 @@ dependencies = [ "syn 2.0.109", ] +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -1459,9 +1481,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" dependencies = [ "serde", ] @@ -2171,6 +2193,24 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "der" version = "0.7.10" @@ -3014,6 +3054,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -3605,9 +3646,9 @@ dependencies = [ [[package]] name = "minicbor" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f182275033b808ede9427884caa8e05fa7db930801759524ca7925bd8aa7a82" +checksum = "fcdf88f4e6c97939dc99797448e722d618ab7e34f96d2c030d60fb8023793fa0" dependencies = [ "minicbor-derive 0.18.2", ] @@ -3651,7 +3692,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "546cc904f35809921fa57016a84c97e68d9d27c012e87b9dadc28c233705f783" dependencies = [ - "minicbor 2.1.1", + "minicbor 2.1.2", "serde", ] @@ -7435,6 +7476,29 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64 0.22.1", + "deadpool", + "futures", + "http 1.3.1", + "http-body-util", + "hyper 1.7.0", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.46.0" diff --git a/README.md b/README.md index 41cc4182..f28349ec 100644 --- a/README.md +++ b/README.md @@ -26,28 +26,23 @@ to communicate between micro-services. ```mermaid graph TB - subgraph Process A Module1(Module 1) Module2(Module 2) Caryatid1(Caryatid Framework) - Module1 <--> Caryatid1 Module2 <--> Caryatid1 end subgraph Process B Module3(Module 3) - Caryatid2(Caryatid Framework) - Module3 <--> Caryatid2 end RabbitMQ([RabbitMQ Message Bus]) - style RabbitMQ fill:#eff - + style RabbitMQ fill: #eff Caryatid1 <--> RabbitMQ Caryatid2 <--> RabbitMQ ``` @@ -61,6 +56,9 @@ graph TB Fetches a chain snapshot from Mithril and replays all the blocks in it - [Genesis Bootstrapper](modules/genesis_bootstrapper) - reads the Genesis file for a chain and generates initial UTXOs +- [Snapshot Bootstrapper](modules/snapshot_bootstrapper) - downloads ledger state snapshot files for configured epochs, + streams and parses the CBOR data (UTXOs, pools, accounts, DReps, proposals), and publishes completion messages to + signal snapshot readiness to other modules. - [Block Unpacker](modules/block_unpacker) - unpacks received blocks into individual transactions - [Tx Unpacker](modules/tx_unpacker) - parses transactions and generates UTXO @@ -69,7 +67,8 @@ graph TB - [SPO State](modules/spo_state) - matches SPO registrations and retirements - [DRep State](modules/drep_state) - tracks DRep registrations - [Governance State](modules/governance_state) - tracks Governance Actions and voting -- [Stake Delta Filter](modules/stake_delta_filter) - filters out stake address changes and handles stake pointer references +- [Stake Delta Filter](modules/stake_delta_filter) - filters out stake address changes and handles stake pointer + references - [Epochs State](modules/epochs_state) - track fees blocks minted and epochs history - [Accounts State](modules/accounts_state) - stake and reward accounts tracker - [Assets State](modules/assets_state) - tracks native asset supply, metadata, transactions, and addresses diff --git a/common/Cargo.toml b/common/Cargo.toml index 1c23f39c..63473f4e 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -41,9 +41,10 @@ thiserror = "2.0.17" sha2 = "0.10.8" [dev-dependencies] -caryatid_process = { workspace = true } +tempfile = "3.23" config = { workspace = true } -tempfile = "3" +caryatid_process = { workspace = true } + [lib] crate-type = ["rlib"] diff --git a/common/src/snapshot/NOTES.md b/common/src/snapshot/NOTES.md deleted file mode 100644 index 710ebd3f..00000000 --- a/common/src/snapshot/NOTES.md +++ /dev/null @@ -1,92 +0,0 @@ -# Bootstrapping from a Snapshot file - -We can boot an Acropolis node either from geneis and replay all of the blocks up to -some point, or we can boot from a snapshot file. This module provides the components -needed to boot from a snapshot file. -See [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) for the process that -references and runs with these helpers. - -Booting from a snapshot takes minutes instead of the hours it takes to boot from -genesis. It also allows booting from a given epoch which allows one to create tests -that rely only on that epoch of data. We're also skipping some of the problematic -eras and will typically boot from Conway around epoch 305, 306, and 307. It takes -three epochs to have enough context to correctly calculate the rewards. - -The required data for boostrapping are: - -- snapshot files (each has an associated epoch number and point) -- nonces -- headers - -## Snapshot Files - -The snapshots come from the Amaru project. In their words, -"the snapshots we generated are different [from a Mithril snapshot]: they're -the actual ledger state; i.e. the in-memory state that is constructed by iterating over each block up to a specific -point. So, it's all the UTxOs, the set of pending governance actions, the account balance, etc. -If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from disk. -The format of these is completely non-standard; we just forked the haskell node and spit out whatever we needed to in -CBOR." - -Snapshot files are referenced by their epoch number in the config.json file below. - -See [Amaru snapshot format](../../../docs/amaru-snapshot-structure.md) - -## Configuration files - -There is a path for each network bootstrap configuration file. Network Should -be one of 'mainnet', 'preprod', 'preview' or 'testnet_' where -`magic` is a 32-bits unsigned value denoting a particular testnet. - -Data structure, e.g. as [Amaru mainnet](https://github.com/pragma-org/amaru/tree/main/data/mainnet) - -The bootstrapper will be given a path to a directory that is expected to contain -the following files: snapshots.json, nonces.json, and headers.json. The path will -be used as a prefix to resolve per-network configuration files -needed for bootstrapping. Given a source directory `data`, and a -a network name of `preview`, the expected layout for configuration files would be: - -* `data/preview/config.json`: a list of epochs to load. -* `data/preview/snapshots.json`: a list of `Snapshot` values (epoch, point, url) -* `data/preview/nonces.json`: a list of `InitialNonces` values, -* `data/preview/headers.json`: a list of `Point`s. - -These files are loaded by [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) -during bootup. - -## Bootstrapping sequence - -The bootstrapper will be started with an argument that specifies a network, -e.g. "mainnet". From the network, it will build a path to the configuration -and snapshot files as shown above, then load the data contained or described -in those files. config.json holds a list of typically 3 epochs that can be -used to index into snapshots.json to find the corresponding URLs and meta-data -for each of the three snapshot files. Loading occurs in this order: - -* publish `SnapshotMessage::Startup` -* download the snapshots (on demand; may have already been done externally) -* parse each snapshot and publish their data on the message bus -* read nonces and publish -* read headers and publish -* publish `CardanoMessage::GenesisComplete(GenesisCompleteMessage {...})` - -Modules in the system will have subscribed to the Startup message and also -to individual structural data update messages before the -boostrapper runs the above sequence. Upon receiving the `Startup` message, -they will use data messages to populate their state, history (for BlockFrost), -and any other state required to achieve readiness to operate on reception of -the `GenesisCompleteMessage`. - -## Data update messages - -The bootstrapper will publish data as it parses the snapshot files, nonces, and -headers. Snapshot parsing is done while streaming the data to keep the memory -footprint lower. As elements of the file are parsed, callbacks provide the data -to the boostrapper which publishes the data on the message bus. - -There are TODO markers in [snapshot_bootsrapper](../../../modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs) -that show where to add the -publishing of the parsed snapshot data. - - - diff --git a/common/src/snapshot/streaming_snapshot.rs b/common/src/snapshot/streaming_snapshot.rs index 673acb12..e4dc9f48 100644 --- a/common/src/snapshot/streaming_snapshot.rs +++ b/common/src/snapshot/streaming_snapshot.rs @@ -1568,7 +1568,7 @@ impl StreamingSnapshotParser { if utxo_count.is_multiple_of(1000000) { let buffer_usage = buffer.len(); info!( - " Streamed {} UTXOs, buffer: {} MB, max entry: {} bytes", + "Streamed {} UTXOs, buffer: {} MB, max entry: {} bytes", utxo_count, buffer_usage / 1024 / 1024, max_single_entry_size @@ -1625,20 +1625,17 @@ impl StreamingSnapshotParser { } } - info!(" 🎯 STREAMING RESULTS:"); - info!(" • UTXOs processed: {}", utxo_count); + info!("Streaming results:"); + info!(" UTXOs processed: {}", utxo_count); info!( - " • Total data streamed: {:.2} MB", + " Total data streamed: {:.2} MB", total_bytes_processed as f64 / 1024.0 / 1024.0 ); info!( - " • Peak buffer usage: {} MB (vs 2.1GB before!)", + " Peak buffer usage: {} MB", PARSE_BUFFER_SIZE / 1024 / 1024 ); - info!( - " • Largest single entry: {} bytes", - max_single_entry_size - ); + info!(" Largest single entry: {} bytes", max_single_entry_size); Ok(utxo_count) } diff --git a/modules/README.md b/modules/README.md index 3d288502..c4ccdf3a 100644 --- a/modules/README.md +++ b/modules/README.md @@ -10,6 +10,9 @@ compose the Acropolis Architecture Fetches a chain snapshot from Mithril and replays all the blocks in it * [Genesis Bootstrapper](genesis_bootstrapper) - reads the Genesis file for a chain and generates initial UTXOs +* [Snapshot Bootstrapper](snapshot_bootstrapper) - downloads ledger state snapshot files for configured epochs, + streams and parses the CBOR data (UTXOs, pools, accounts, DReps, proposals), and publishes completion messages to + signal snapshot readiness to other modules. * [Block Unpacker](block_unpacker) - unpacks received blocks into individual transactions * [Tx Unpacker](tx_unpacker) - parses transactions and generates UTXO @@ -19,7 +22,6 @@ compose the Acropolis Architecture * [DRep State](drep_state) - tracks DRep registrations * [Governance State](governance_state) - tracks Governance Actions and voting * [Stake Delta Filter](stake_delta_filter) - filters out stake address changes and handles stake pointer references -* [Epoch Activity Counter](epoch_activity_couinter) - counts fees and block production for rewards * [Accounts State](accounts_state) - stake and reward accounts tracker ## How to add a new module @@ -88,7 +90,7 @@ to call `MyModule::register()` in the process `main()`: use acropolis_module_my_module::MyModule; // in main()... - MyModule::register(&mut process); +MyModule::register( & mut process); ``` You also need to mention the module in (e.g.) `omnibus.toml` to get it created, even if all diff --git a/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs b/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs index 5df5cd22..1a1a91e1 100644 --- a/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs +++ b/modules/mithril_snapshot_fetcher/src/mithril_snapshot_fetcher.rs @@ -379,7 +379,7 @@ impl MithrilSnapshotFetcher { // Send completion message if let Some(last_block_info) = last_block_info { info!( - "Finished shapshot at block {}, epoch {}", + "Finished snapshot at block {}, epoch {}", last_block_info.number, last_block_info.epoch ); let message_enum = diff --git a/modules/snapshot_bootstrapper/Cargo.toml b/modules/snapshot_bootstrapper/Cargo.toml index 05a3d128..35cb7b84 100644 --- a/modules/snapshot_bootstrapper/Cargo.toml +++ b/modules/snapshot_bootstrapper/Cargo.toml @@ -17,6 +17,20 @@ anyhow = { workspace = true } config = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } +serde = { workspace = true, features = ["rc"] } +serde_json = { workspace = true } +thiserror = "2.0.17" +async-compression = { version = "0.4.32", features = ["tokio", "gzip"] } +reqwest = { version = "0.12", features = ["stream"] } +futures-util = "0.3.31" +tokio-util = "0.7.17" +hex = "0.4.3" + +[dev-dependencies] +wiremock = "0.6.5" +flate2 = "1.1.5" +tempfile = "3.23.0" + [lib] -path = "src/snapshot_bootstrapper.rs" +path = "src/bootstrapper.rs" diff --git a/modules/snapshot_bootstrapper/NOTES.md b/modules/snapshot_bootstrapper/NOTES.md new file mode 100644 index 00000000..97673b66 --- /dev/null +++ b/modules/snapshot_bootstrapper/NOTES.md @@ -0,0 +1,107 @@ +# Bootstrapping from a Snapshot file + +We can boot an Acropolis node either from genesis and replay all of the blocks up to +some point, or we can boot from a snapshot file. This module provides the components +needed to boot from a snapshot file. +See [snapshot_bootstrapper](src/bootstrapper.rs) for the process that +references and runs with these helpers. + +Booting from a snapshot should take minutes instead of the hours it takes to boot from +genesis. It also allows booting from a given epoch, which allows one to create tests +that rely only on that epoch of data. We're also skipping some of the problematic +eras and will typically boot from Conway. At the moment, we're confident +it takes only 1 NewEpochState cbor dump to bootstrap the node. + +The required data for bootstrapping are: + +- snapshot files (each has an associated epoch number and point (slot + block hash)) +- nonces (not implemented yet) +- headers (not implemented yet) + +## Snapshot Files + +The snapshot approach comes from the Amaru project. In their words, +"the snapshots we generated are different from a NewEpochState dump that's requested from an Ogmios GetCBOR endpoint +with a synchronizing node: they're the actual ledger state; i.e. the in-memory state that is constructed by iterating +over each block up to a specific point. So, it's all the UTxOs, the set of pending governance actions, the account +balance, etc. +If you get this from a trusted source, you don't need to do any replay, you can just start up and load this from the +disk." + +Snapshot files are referenced by their epoch number in the config.json file below. + +See [Amaru snapshot format](../../docs/amaru-snapshot-structure.md) + +## Configuration files + +There is a path for each network bootstrap configuration file. Network should +be one of 'mainnet', 'preprod', 'preview' or 'testnet_' where +`magic` is a 32-bits unsigned value denoting a particular testnet. + +Data structure, e.g. as [Amaru mainnet](https://github.com/pragma-org/amaru/tree/main/data/mainnet) + +The bootstrapper will be given a path to a directory that is expected to contain +the following files: snapshots.json and config.json. The path will +be used as a prefix to resolve per-network configuration files +needed for bootstrapping. Given a source directory `data`, and a +a network name of `preview`, the expected layout for configuration files would be: + +* `data/preview/config.json`: a list of epochs to load and points +* `data/preview/snapshots.json`: a list of `SnapshotFileMetadata` values (epoch, point, url) + +These files are loaded by [snapshot_bootstrapper](src/bootstrapper.rs) +during bootup. + +## Bootstrapping sequence + +The bootstrapper will be started with a configuration that specifies a network, +e.g. "mainnet". From the network, it will build a path to the configuration +and snapshot files as shown above, then load the data contained or described +in those files. config.json holds a list of typically 3 epochs that can be +used to index into snapshots.json to find the corresponding URLs and meta-data +for each of the three snapshot files. Loading occurs in this order: + +1. Wait for `startup-topic` message (typically `cardano.sequence.start`) +2. Wait for `bootstrapped-topic` message with genesis values (typically `cardano.sequence.bootstrapped`) +3. Load network configuration from `config.json` +4. Load snapshot metadata from `snapshots.json` +5. Filter snapshots based on epochs specified in config.json +6. Download snapshot files (skips if already present) +7. Publish `SnapshotMessage::Startup` to the snapshot topic +8. Parse each snapshot file sequentially using the [streaming_snapshot](../../common/src/snapshot/streaming_snapshot.rs) +9. Publish `CardanoMessage::SnapshotComplete` with final block info to the completion topic + +Modules in the system will have subscribed to the startup and completion topics before the +bootstrapper runs the above sequence. Upon receiving snapshot data messages, +they will use the data to populate their state, and any other state required to achieve readiness to operate. + +## Data update messages + +The bootstrapper publishes data as it parses the snapshot files using the `SnapshotPublisher`. +Snapshot parsing is done while streaming the data to keep the memory +footprint lower. As elements of the file are parsed, callbacks provide the data +to the publisher, which can then publish structured data on the message bus. + +The `SnapshotPublisher` implements the streaming snapshot callbacks: + +- `UtxoCallback`: Receives individual UTXO entries +- `PoolCallback`: Receives pool information +- `StakeCallback`: Receives account/stake information +- `DRepCallback`: Receives DRep (delegated representative) information +- `ProposalCallback`: Receives governance proposals +- `SnapshotCallbacks`: Receives metadata and completion signals + +Currently, the publisher just accumulates this data, but this will need to be extended to publish the corresponding +message types. Publishing of detailed snapshot data to downstream modules can be added by implementing the +appropriate message bus publishing in the callback methods. + +## Configuration + +The bootstrapper supports the following configuration options: + +- `network`: Network name (default: "mainnet") +- `data-dir`: Base directory for network data (default: "./data") +- `startup-topic`: Topic to wait for startup signal (default: "cardano.sequence.start") +- `snapshot-topic`: Topic to publish snapshot messages (default: "cardano.snapshot") +- `bootstrapped-subscribe-topic`: Topic to receive genesis completion (default: "cardano.sequence.bootstrapped") +- `completion-topic`: Topic to publish completion signal (default: "cardano.snapshot.complete") \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/README.md b/modules/snapshot_bootstrapper/README.md new file mode 100644 index 00000000..8ea90146 --- /dev/null +++ b/modules/snapshot_bootstrapper/README.md @@ -0,0 +1,47 @@ +# Snapshot Bootstrapper Module + +The snapshot bootstrapper module downloads and processes Cardano ledger snapshots to initialize system state before +processing the live chain. + +## Overview + +This module: + +1. Waits for genesis bootstrap completion +2. Downloads compressed snapshot files from configured URLs +3. Streams and publishes snapshot data (UTXOs, pools, accounts, DReps, proposals) +4. Signals completion to allow chain synchronization to begin + +## Messages + +The snapshot bootstrapper: + +- **Subscribes to** `cardano.sequence.start` - Waits for startup signal +- **Subscribes to** `cardano.sequence.bootstrapped` - Waits for genesis completion +- **Publishes to** `cardano.snapshot` - Streams snapshot data during processing +- **Publishes to** `cardano.snapshot.complete` - Signals completion with block info + +## Default Configuration + +```toml +[module.snapshot-bootstrapper] + +# Network and data +network = "mainnet" +data-dir = "./data" + +# Message topics +startup-topic = "cardano.sequence.start" +snapshot-topic = "cardano.snapshot" +bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" +completion-topic = "cardano.snapshot.complete" +``` + +## Directory Structure + +The module expects the following files in `{data-dir}/{network}/`: + +- **`config.json`** - Network configuration specifying which snapshot epochs to load +- **`snapshots.json`** - Snapshot metadata including download URLs + +Snapshot files are downloaded to `{data-dir}/{network}/{point}.cbor`. \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/config.default.toml b/modules/snapshot_bootstrapper/config.default.toml new file mode 100644 index 00000000..172222c2 --- /dev/null +++ b/modules/snapshot_bootstrapper/config.default.toml @@ -0,0 +1,28 @@ +# Snapshot bootstrapper default configuration + +# The Cardano network to connect to (e.g., "mainnet", "preprod", "preview") +network = "mainnet" + +# Base directory for storing network data, snapshots, and configuration files. +# Network-specific files will be stored in a subdirectory: // +# Expected files: config.json (network config) and snapshots.json (snapshot metadata) +data-dir = "./data" + +# Topic to subscribe to for receiving the initial startup signal. +# The bootstrapper waits for a message on this topic before beginning any work. +startup-topic = "cardano.sequence.start" + +# Topic for publishing snapshot data during processing. +# Publishes a Startup message when snapshot processing begins, followed by +# snapshot content (UTXOs, pools, accounts, DReps, governance proposals). +snapshot-topic = "cardano.snapshot" + +# Topic to subscribe to for genesis bootstrap completion. +# The bootstrapper waits for a GenesisComplete message on this topic +# before proceeding with snapshot processing. +bootstrapped-subscribe-topic = "cardano.sequence.bootstrapped" + +# Topic for publishing the final SnapshotComplete message. +# Signals that all snapshots have been fully processed and the +# blockchain state is ready for downstream consumers. +completion-topic = "cardano.snapshot.complete" \ No newline at end of file diff --git a/modules/snapshot_bootstrapper/data/mainnet/config.json b/modules/snapshot_bootstrapper/data/mainnet/config.json new file mode 100644 index 00000000..a1bb266a --- /dev/null +++ b/modules/snapshot_bootstrapper/data/mainnet/config.json @@ -0,0 +1,104 @@ +{ + "snapshots": [ + 507, + 508, + 509 + ], + "points": [ + { + "epoch": 507, + "id": "670ca68c3de580f8469677754a725e86ca72a7be381d3108569f0704a5fca327", + "slot": 134092758 + }, + { + "epoch": 508, + "id": "29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494", + "slot": 134524753 + }, + { + "epoch": 509, + "id": "6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c", + "slot": 134956789 + }, + { + "epoch": 510, + "id": "3fd738bacbcc277d43358a28ed15fa4335977c822fd7546d0de2606d7d2a57aa", + "slot": 135388794 + }, + { + "epoch": 511, + "id": "d02f89d21fe9c80f927eeda31fadb03b589db2ac5c8108d7171c4c319aca2fa1", + "slot": 135820797 + }, + { + "epoch": 512, + "id": "9503c7c669746be68ca34ed4d822d9d0dcccd5c0ef61cb9679a1c1e739534853", + "slot": 136252793 + }, + { + "epoch": 513, + "id": "e2bb0babbc715953ce1edc4e0c817a4b1fa9d36124648b4d21d1a2ccd26be672", + "slot": 136684793 + }, + { + "epoch": 514, + "id": "2c4f7a0a855e76e5d83b9d3e168213711490663dddfc6925e09a37fe46ed62b4", + "slot": 137116798 + }, + { + "epoch": 515, + "id": "66c5229785de3ff7bb2834db69fc8da5d3203a7cdf2d6983b3e9e155ff6ec0fb", + "slot": 137548794 + }, + { + "epoch": 516, + "id": "b934fa686e585636cc74a07555dbd8c10f9680464f80273f1d29806ecbc5e822", + "slot": 137980781 + }, + { + "epoch": 517, + "id": "7c4afb5f4ba5d7182f99fd839e26302bcdca06c9066b825f2f40f4a094d7f0ab", + "slot": 138412701 + }, + { + "epoch": 518, + "id": "a65138e908ccc90014b4ae740382c7908f9636e56c6e9d6ecec38f452b70c93f", + "slot": 138844799 + }, + { + "epoch": 519, + "id": "0cffc5eb77a6885257fcba94b8fd6fdddc80e368bf4ef855f058c6adda4933c1", + "slot": 139276793 + }, + { + "epoch": 520, + "id": "58f198313d00d639814db34f32aad259e22c53089dfa95dae79e0e2e4d93c6f0", + "slot": 139708765 + }, + { + "epoch": 521, + "id": "7e423f52284987b4b358a0a9b6847525c42a818a024dde663101669ab2e8a6ee", + "slot": 140140779 + }, + { + "epoch": 522, + "id": "86f874039f07143ab4d7d5c6ccb27ea33fd1440f81176055fe9e4e6e910800e9", + "slot": 140572798 + }, + { + "epoch": 523, + "id": "96a53046d8bbfa690b6bfbc2c7f99036b3494f99e616e998224bcfcd33b84e7b", + "slot": 141004797 + }, + { + "epoch": 524, + "id": "865267d5b5fe9d497418ea72c9b84058e5aa2a98ace96043d53fec32eebf4fef", + "slot": 141436773 + }, + { + "epoch": 525, + "id": "e4846337e6f87ed65c88e770ab5c1bec39de45cbf3bdde88b249ac1ad2cd2a8a", + "slot": 141868737 + } + ] +} diff --git a/modules/snapshot_bootstrapper/data/mainnet/snapshots.json b/modules/snapshot_bootstrapper/data/mainnet/snapshots.json new file mode 100644 index 00000000..695a55ec --- /dev/null +++ b/modules/snapshot_bootstrapper/data/mainnet/snapshots.json @@ -0,0 +1,17 @@ +[ + { + "epoch": 507, + "point": "134092758.670ca68c3de580f8469677754a725e86ca72a7be381d3108569f0704a5fca327", + "url": "https://pub-b844360df4774bb092a2bb2043b888e5.r2.dev/134092758.670ca68c3de580f8469677754a725e86ca72a7be381d3108569f0704a5fca327.cbor.gz" + }, + { + "epoch": 508, + "point": "134524753.29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494", + "url": "https://pub-b844360df4774bb092a2bb2043b888e5.r2.dev/134524753.29011cc1320d03b3da0121236dc66e6bc391feef4bb1d506a7fb20e769d6a494.cbor.gz" + }, + { + "epoch": 509, + "point": "134956789.6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c", + "url": "https://pub-b844360df4774bb092a2bb2043b888e5.r2.dev/134956789.6558deef007ba372a414466e49214368c17c1f8428093193fc187d1c4587053c.cbor.gz" + } +] diff --git a/modules/snapshot_bootstrapper/src/bootstrapper.rs b/modules/snapshot_bootstrapper/src/bootstrapper.rs new file mode 100644 index 00000000..022138cc --- /dev/null +++ b/modules/snapshot_bootstrapper/src/bootstrapper.rs @@ -0,0 +1,240 @@ +mod configuration; +mod downloader; +mod progress_reader; +mod publisher; + +use crate::configuration::{ConfigError, NetworkConfig, SnapshotConfig, SnapshotFileMetadata}; +use crate::downloader::{DownloadError, SnapshotDownloader}; +use crate::publisher::SnapshotPublisher; +use acropolis_common::genesis_values::GenesisValues; +use acropolis_common::snapshot::streaming_snapshot::StreamingSnapshotParser; +use acropolis_common::{ + messages::{CardanoMessage, Message}, + BlockHash, BlockInfo, BlockStatus, Era, +}; +use anyhow::{bail, Result}; +use caryatid_sdk::{module, Context, Subscription}; +use config::Config; +use std::sync::Arc; +use thiserror::Error; +use tokio::time::Instant; +use tracing::{error, info, info_span, Instrument}; + +#[derive(Debug, Error)] +pub enum BootstrapError { + #[error("Configuration error: {0}")] + Config(#[from] ConfigError), + + #[error("Download error: {0}")] + Download(#[from] DownloadError), + + #[error("Snapshot parsing failed: {0}")] + Parse(String), + + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +#[module( + message_type(Message), + name = "snapshot-bootstrapper", + description = "Snapshot Bootstrapper to broadcast state via streaming" +)] +pub struct SnapshotBootstrapper; + +impl SnapshotBootstrapper { + /// Initializes the snapshot bootstrapper. + pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { + let cfg = SnapshotConfig::try_load(&config)?; + + info!("Snapshot bootstrapper initializing"); + info!(" Network: {}", cfg.network); + info!(" Data directory: {}", cfg.data_dir); + info!(" Publishing on '{}'", cfg.snapshot_topic); + info!(" Completing with '{}'", cfg.completion_topic); + + let startup_sub = context.subscribe(&cfg.startup_topic).await?; + let bootstrapped_sub = context.subscribe(&cfg.bootstrapped_subscribe_topic).await?; + + context.clone().run(async move { + let span = info_span!("snapshot_bootstrapper.handle"); + async { + // Wait for the startup signal + if let Err(e) = Self::wait_startup(startup_sub).await { + error!("Failed waiting for startup: {e:#}"); + return; + } + + // Wait for genesis bootstrap completion + if let Err(e) = Self::wait_genesis_completion(bootstrapped_sub).await { + error!("Failed waiting for bootstrapped: {e:#}"); + return; + } + + info!("Bootstrap prerequisites met, starting snapshot processing"); + + // Load network configuration + let network_config = match NetworkConfig::read_from_file(&cfg.config_path()) { + Ok(config) => config, + Err(e) => { + error!("Failed to read network config: {e:#}"); + return; + } + }; + + // Load snapshots metadata + let all_snapshots = + match SnapshotFileMetadata::read_all_from_file(&cfg.snapshots_path()) { + Ok(snapshots) => snapshots, + Err(e) => { + error!("Failed to read snapshots metadata: {e:#}"); + return; + } + }; + + // Filter snapshots based on network config + let target_snapshots = SnapshotFileMetadata::filter_by_epochs( + &all_snapshots, + &network_config.snapshots, + ); + if target_snapshots.is_empty() { + error!( + "No snapshots found for requested epochs: {:?}", + network_config.snapshots + ); + return; + } + + info!("Found {} snapshot(s) to process", target_snapshots.len()); + + // Create downloader and download all snapshots + let downloader = match SnapshotDownloader::new(cfg.network_dir()) { + Ok(d) => d, + Err(e) => { + error!("Failed to create snapshot downloader: {e:#}"); + return; + } + }; + + if let Err(e) = downloader.download_all(&target_snapshots).await { + error!("Failed to download snapshots: {e:#}"); + return; + } + + // Process snapshots in order + if let Err(e) = + Self::process_snapshots(&target_snapshots, &cfg, context.clone()).await + { + error!("Failed to process snapshots: {e:#}"); + return; + } + + info!("Snapshot bootstrap completed successfully"); + } + .instrument(span) + .await; + }); + + Ok(()) + } + + async fn wait_startup(mut subscription: Box>) -> Result<()> { + let (_, _message) = subscription.read().await?; + info!("Received startup message"); + Ok(()) + } + + async fn wait_genesis_completion( + mut subscription: Box>, + ) -> Result<()> { + let (_, message) = subscription.read().await?; + match message.as_ref() { + Message::Cardano((_, CardanoMessage::GenesisComplete(_complete))) => { + info!("Received genesis complete message"); + Ok(()) + } + msg => bail!("Unexpected message in bootstrapped topic: {msg:?}"), + } + } + + async fn process_snapshots( + snapshots: &[SnapshotFileMetadata], + cfg: &SnapshotConfig, + context: Arc>, + ) -> Result<()> { + let mut publisher = SnapshotPublisher::new( + context, + cfg.completion_topic.clone(), + cfg.snapshot_topic.clone(), + ); + + publisher.publish_start().await?; + + for snapshot_meta in snapshots { + let file_path = snapshot_meta.file_path(&cfg.network_dir()); + + info!( + "Processing snapshot for epoch {} from {}", + snapshot_meta.epoch, file_path + ); + + Self::parse_snapshot(&file_path, &mut publisher).await?; + } + + let last_snapshot = + snapshots.last().ok_or_else(|| anyhow::anyhow!("No snapshots to process"))?; + + let block_info = build_block_info_from_metadata(last_snapshot).map_err(|e| { + BootstrapError::Parse(format!( + "Failed to build block info from snapshot metadata: {e}" + )) + })?; + + publisher.publish_completion(block_info).await?; + + Ok(()) + } + + async fn parse_snapshot(file_path: &str, publisher: &mut SnapshotPublisher) -> Result<()> { + info!("Parsing snapshot: {}", file_path); + let start = Instant::now(); + + let parser = StreamingSnapshotParser::new(file_path); + parser.parse(publisher)?; + + let duration = start.elapsed(); + info!("Parsed snapshot in {:.2?}", duration); + + Ok(()) + } +} + +fn build_block_info_from_metadata(metadata: &SnapshotFileMetadata) -> Result { + let (slot, block_hash_str) = metadata + .parse_point() + .ok_or_else(|| anyhow::anyhow!("Invalid point format: {}", metadata.point))?; + + let hash = BlockHash::try_from(hex::decode(block_hash_str)?) + .map_err(|e| anyhow::anyhow!("Invalid block hash hex: {:?}", e))?; + + let genesis = GenesisValues::mainnet(); + let epoch_slot = slot - genesis.epoch_to_first_slot(slot); + let timestamp = genesis.slot_to_timestamp(slot); + + info!( + "Block info built: slot={}, hash={}, epoch={}, slot_in_epoch={}, timestamp={}", + slot, hash, metadata.epoch, epoch_slot, timestamp + ); + + Ok(BlockInfo { + status: BlockStatus::Immutable, + slot, + number: 0, + hash, + epoch: metadata.epoch, + epoch_slot, + new_epoch: false, + timestamp, + era: Era::Conway, + }) +} diff --git a/modules/snapshot_bootstrapper/src/configuration.rs b/modules/snapshot_bootstrapper/src/configuration.rs new file mode 100644 index 00000000..407ad165 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/configuration.rs @@ -0,0 +1,349 @@ +use anyhow::Result; +use config::Config; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::io; +use std::path::PathBuf; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ConfigError { + #[error("Cannot read network config file {0}: {1}")] + ReadNetworkConfig(PathBuf, io::Error), + + #[error("Cannot read snapshots metadata file {0}: {1}")] + ReadSnapshotsFile(PathBuf, io::Error), + + #[error("Failed to parse network config {0}: {1}")] + MalformedNetworkConfig(PathBuf, serde_json::Error), + + #[error("Failed to parse snapshots JSON file {0}: {1}")] + MalformedSnapshotsFile(PathBuf, serde_json::Error), +} + +/// Configuration for the snapshot bootstrapper +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct SnapshotConfig { + pub network: String, + pub data_dir: String, + pub startup_topic: String, + pub snapshot_topic: String, + pub bootstrapped_subscribe_topic: String, + pub completion_topic: String, +} + +impl SnapshotConfig { + pub fn try_load(config: &Config) -> Result { + let full_config = Config::builder() + .add_source(config::File::from_str( + include_str!("../config.default.toml"), + config::FileFormat::Toml, + )) + .add_source(config.clone()) + .build()?; + Ok(full_config.try_deserialize()?) + } + + pub fn network_dir(&self) -> String { + format!("{}/{}", self.data_dir, self.network) + } + + pub fn config_path(&self) -> String { + format!("{}/config.json", self.network_dir()) + } + + pub fn snapshots_path(&self) -> String { + format!("{}/snapshots.json", self.network_dir()) + } +} + +/// Network configuration file (config.json) +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct NetworkConfig { + pub snapshots: Vec, + pub points: Vec, +} + +impl NetworkConfig { + pub fn read_from_file(path: &str) -> Result { + let path_buf = PathBuf::from(path); + let content = fs::read_to_string(&path_buf) + .map_err(|e| ConfigError::ReadNetworkConfig(path_buf.clone(), e))?; + + let config: NetworkConfig = serde_json::from_str(&content) + .map_err(|e| ConfigError::MalformedNetworkConfig(path_buf, e))?; + + Ok(config) + } +} + +/// Point +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct Point { + pub epoch: u64, + pub id: String, + pub slot: u64, +} + +/// Snapshot metadata from snapshots.json +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct SnapshotFileMetadata { + pub epoch: u64, + pub point: String, + pub url: String, +} + +impl SnapshotFileMetadata { + pub fn read_all_from_file(path: &str) -> Result, ConfigError> { + let path_buf = PathBuf::from(path); + let content = fs::read_to_string(&path_buf) + .map_err(|e| ConfigError::ReadSnapshotsFile(path_buf.clone(), e))?; + + let snapshots: Vec = serde_json::from_str(&content) + .map_err(|e| ConfigError::MalformedSnapshotsFile(path_buf, e))?; + + Ok(snapshots) + } + + pub fn parse_point(&self) -> Option<(u64, String)> { + let parts: Vec<&str> = self.point.splitn(2, '.').collect(); + if parts.len() == 2 { + let slot = parts[0].parse().ok()?; + let hash = parts[1].to_string(); + Some((slot, hash)) + } else { + None + } + } + + pub fn file_path(&self, network_dir: &str) -> String { + format!("{}/{}.cbor", network_dir, self.point) + } + + pub fn filter_by_epochs(snapshots: &[Self], epochs: &[u64]) -> Vec { + snapshots.iter().filter(|s| epochs.contains(&s.epoch)).cloned().collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use std::path::Path; + use tempfile::TempDir; + + fn create_test_network_config(dir: &Path, snapshots: Vec) -> PathBuf { + let config = NetworkConfig { + snapshots, + points: vec![Point { + epoch: 500, + id: "test_block_hash".to_string(), + slot: 12345678, + }], + }; + + let config_path = dir.join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(serde_json::to_string_pretty(&config).unwrap().as_bytes()).unwrap(); + config_path + } + + fn create_test_snapshots_metadata(dir: &Path, epochs: Vec, base_url: &str) -> PathBuf { + let snapshots: Vec = epochs + .iter() + .map(|epoch| SnapshotFileMetadata { + epoch: *epoch, + point: format!("point_{}", epoch), + url: format!("{}/snapshot_{}.cbor.gz", base_url, epoch), + }) + .collect(); + + let snapshots_path = dir.join("snapshots.json"); + let mut file = fs::File::create(&snapshots_path).unwrap(); + file.write_all(serde_json::to_string_pretty(&snapshots).unwrap().as_bytes()).unwrap(); + snapshots_path + } + + #[test] + fn test_snapshot_config_network_dir() { + let config = SnapshotConfig { + network: "mainnet".to_string(), + data_dir: "./data".to_string(), + startup_topic: "startup".to_string(), + snapshot_topic: "snapshot".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), + completion_topic: "completion".to_string(), + }; + + assert_eq!(config.network_dir(), "./data/mainnet"); + } + + #[test] + fn test_snapshot_config_config_path() { + let config = SnapshotConfig { + network: "preprod".to_string(), + data_dir: "/var/data".to_string(), + startup_topic: "startup".to_string(), + snapshot_topic: "snapshot".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), + completion_topic: "completion".to_string(), + }; + + assert_eq!(config.config_path(), "/var/data/preprod/config.json"); + } + + #[test] + fn test_snapshot_config_snapshots_path() { + let config = SnapshotConfig { + network: "mainnet".to_string(), + data_dir: "./data".to_string(), + startup_topic: "startup".to_string(), + snapshot_topic: "snapshot".to_string(), + bootstrapped_subscribe_topic: "bootstrapped".to_string(), + completion_topic: "completion".to_string(), + }; + + assert_eq!(config.snapshots_path(), "./data/mainnet/snapshots.json"); + } + + #[test] + fn test_snapshot_file_metadata_file_path() { + let metadata = SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "https://example.com/snapshot.cbor.gz".to_string(), + }; + + assert_eq!( + metadata.file_path("/data/mainnet"), + "/data/mainnet/point_500.cbor" + ); + } + + #[test] + fn test_filter_by_epochs() { + let all_snapshots = vec![ + SnapshotFileMetadata { + epoch: 500, + point: "point_500".to_string(), + url: "url1".to_string(), + }, + SnapshotFileMetadata { + epoch: 501, + point: "point_501".to_string(), + url: "url2".to_string(), + }, + SnapshotFileMetadata { + epoch: 502, + point: "point_502".to_string(), + url: "url3".to_string(), + }, + ]; + + let filtered = SnapshotFileMetadata::filter_by_epochs(&all_snapshots, &[500, 502]); + + assert_eq!(filtered.len(), 2); + assert_eq!(filtered[0].epoch, 500); + assert_eq!(filtered[1].epoch, 502); + } + + #[test] + fn test_read_network_config_success() { + let temp_dir = TempDir::new().unwrap(); + let config_path = create_test_network_config(temp_dir.path(), vec![500, 501]); + + let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); + assert!(result.is_ok()); + + let config = result.unwrap(); + assert_eq!(config.snapshots, vec![500, 501]); + assert_eq!(config.points.len(), 1); + } + + #[test] + fn test_read_network_config_missing_file() { + let result = NetworkConfig::read_from_file("/nonexistent/config.json"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + ConfigError::ReadNetworkConfig(_, _) + )); + } + + #[test] + fn test_read_network_config_malformed_json() { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(b"{ invalid json }").unwrap(); + + let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + ConfigError::MalformedNetworkConfig(_, _) + )); + } + + #[test] + fn test_read_snapshots_metadata_success() { + let temp_dir = TempDir::new().unwrap(); + let snapshots_path = + create_test_snapshots_metadata(temp_dir.path(), vec![500, 501], "https://example.com"); + + let result = SnapshotFileMetadata::read_all_from_file(snapshots_path.to_str().unwrap()); + assert!(result.is_ok()); + + let snapshots = result.unwrap(); + assert_eq!(snapshots.len(), 2); + assert_eq!(snapshots[0].epoch, 500); + assert_eq!(snapshots[1].epoch, 501); + } + + #[test] + fn test_read_snapshots_metadata_missing_file() { + let result = SnapshotFileMetadata::read_all_from_file("/nonexistent/snapshots.json"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + ConfigError::ReadSnapshotsFile(_, _) + )); + } + + #[test] + fn test_corrupted_config_json_fails_gracefully() { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("config.json"); + let mut file = fs::File::create(&config_path).unwrap(); + file.write_all(b"{\"snapshots\": [500, 501]").unwrap(); + + let result = NetworkConfig::read_from_file(config_path.to_str().unwrap()); + assert!(result.is_err()); + + if let Err(ConfigError::MalformedNetworkConfig(path, _)) = result { + assert_eq!(path, config_path); + } else { + panic!("Expected MalformedNetworkConfig error"); + } + } + + #[test] + fn test_corrupted_snapshots_json_fails_gracefully() { + let temp_dir = TempDir::new().unwrap(); + let snapshots_path = temp_dir.path().join("snapshots.json"); + let mut file = fs::File::create(&snapshots_path).unwrap(); + file.write_all(b"[{\"epoch\": 500}").unwrap(); + + let result = SnapshotFileMetadata::read_all_from_file(snapshots_path.to_str().unwrap()); + assert!(result.is_err()); + + if let Err(ConfigError::MalformedSnapshotsFile(path, _)) = result { + assert_eq!(path, snapshots_path); + } else { + panic!("Expected MalformedSnapshotsFile error"); + } + } +} diff --git a/modules/snapshot_bootstrapper/src/downloader.rs b/modules/snapshot_bootstrapper/src/downloader.rs new file mode 100644 index 00000000..779cff53 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/downloader.rs @@ -0,0 +1,255 @@ +use crate::configuration::SnapshotFileMetadata; +use crate::progress_reader::ProgressReader; +use async_compression::tokio::bufread::GzipDecoder; +use futures_util::TryStreamExt; +use reqwest::Client; +use std::io; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use tokio::fs::File; +use tokio::io::BufReader; +use tracing::info; + +#[derive(Debug, Error)] +pub enum DownloadError { + #[error("Failed to initialize HTTP client: {0}")] + ClientInit(#[from] reqwest::Error), + + #[error("Failed to download snapshot from {0}: {1}")] + RequestFailed(String, reqwest::Error), + + #[error("Download failed from {0}: HTTP status {1}")] + InvalidStatusCode(String, reqwest::StatusCode), + + #[error("Cannot create directory {0}: {1}")] + CreateDirectory(PathBuf, io::Error), + + #[error("I/O error: {0}")] + Io(#[from] io::Error), +} + +/// Handles downloading and decompressing snapshot files +pub struct SnapshotDownloader { + client: Client, + network_dir: String, +} + +impl SnapshotDownloader { + pub fn new(network_dir: String) -> Result { + let client = Client::builder() + .timeout(std::time::Duration::from_mins(5)) + .connect_timeout(std::time::Duration::from_secs(30)) + .build()?; + + Ok(Self { + client, + network_dir, + }) + } + + pub async fn download_all( + &self, + snapshots: &[SnapshotFileMetadata], + ) -> Result<(), DownloadError> { + for snapshot_meta in snapshots { + let file_path = snapshot_meta.file_path(&self.network_dir); + self.download_single(&snapshot_meta.url, &file_path).await?; + } + Ok(()) + } + + /// Downloads a gzip-compressed snapshot from the given URL, decompresses it on-the-fly, + /// and saves the decompressed CBOR data to the specified output path. + /// The data is first written to a `.partial` temporary file to ensure atomicity + /// and then renamed to the final output path upon successful completion. + pub async fn download_single(&self, url: &str, output_path: &str) -> Result<(), DownloadError> { + let path = Path::new(output_path); + + if path.exists() { + info!("Snapshot already exists, skipping: {}", output_path); + return Ok(()); + } + + info!("Downloading snapshot from {}", url); + + if let Some(parent) = path.parent() { + tokio::fs::create_dir_all(parent) + .await + .map_err(|e| DownloadError::CreateDirectory(parent.to_path_buf(), e))?; + } + + let tmp_path = path.with_extension("partial"); + + let result = async { + let response = self + .client + .get(url) + .send() + .await + .map_err(|e| DownloadError::RequestFailed(url.to_string(), e))?; + + if !response.status().is_success() { + return Err(DownloadError::InvalidStatusCode( + url.to_string(), + response.status(), + )); + } + + let content_length = response.content_length(); + let mut file = File::create(&tmp_path).await?; + + let stream = response.bytes_stream().map_err(io::Error::other); + let async_read = tokio_util::io::StreamReader::new(stream); + let progress_reader = ProgressReader::new(async_read, content_length, 200); + let buffered = BufReader::new(progress_reader); + let mut decoder = GzipDecoder::new(buffered); + + tokio::io::copy(&mut decoder, &mut file).await?; + + file.sync_all().await?; + tokio::fs::rename(&tmp_path, output_path).await?; + + info!("Downloaded and decompressed snapshot to {}", output_path); + Ok(()) + } + .await; + + if result.is_err() { + let _ = tokio::fs::remove_file(&tmp_path).await; + } + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + use flate2::write::GzEncoder; + use flate2::Compression; + use std::io::Write; + use tempfile::TempDir; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + fn gzip_compress(data: &[u8]) -> Vec { + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(data).unwrap(); + encoder.finish().unwrap() + } + + #[tokio::test] + async fn test_downloader_skips_existing_file() { + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("snapshot.cbor"); + std::fs::write(&file_path, b"existing data").unwrap(); + + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let result = downloader + .download_single( + "https://example.com/snapshot.cbor.gz", + file_path.to_str().unwrap(), + ) + .await; + + assert!(result.is_ok()); + assert_eq!(std::fs::read(&file_path).unwrap(), b"existing data"); + } + + #[tokio::test] + async fn test_downloader_downloads_and_decompresses() { + let mock_server = MockServer::start().await; + let compressed = gzip_compress(b"snapshot content"); + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(compressed)) + .mount(&mock_server) + .await; + + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("snapshot.cbor"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + + assert!(result.is_ok()); + assert_eq!(std::fs::read(&file_path).unwrap(), b"snapshot content"); + } + + #[tokio::test] + async fn test_downloader_handles_http_error() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(404)) + .mount(&mock_server) + .await; + + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("snapshot.cbor"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + + assert!(matches!( + result, + Err(DownloadError::InvalidStatusCode(_, _)) + )); + assert!(!file_path.exists()); + } + + #[tokio::test] + async fn test_downloader_creates_parent_directories() { + let mock_server = MockServer::start().await; + let compressed = gzip_compress(b"data"); + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(compressed)) + .mount(&mock_server) + .await; + + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("nested").join("dir").join("snapshot.cbor"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + + assert!(result.is_ok()); + assert!(file_path.exists()); + } + + #[tokio::test] + async fn test_downloader_cleans_up_partial_on_failure() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/snapshot.cbor.gz")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(b"not valid gzip")) + .mount(&mock_server) + .await; + + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("snapshot.cbor"); + let partial_path = temp_dir.path().join("snapshot.partial"); + let downloader = + SnapshotDownloader::new(temp_dir.path().to_str().unwrap().to_string()).unwrap(); + + let url = format!("{}/snapshot.cbor.gz", mock_server.uri()); + let result = downloader.download_single(&url, file_path.to_str().unwrap()).await; + + assert!(result.is_err()); + assert!(!file_path.exists()); + assert!(!partial_path.exists()); + } +} diff --git a/modules/snapshot_bootstrapper/src/progress_reader.rs b/modules/snapshot_bootstrapper/src/progress_reader.rs new file mode 100644 index 00000000..e042d746 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/progress_reader.rs @@ -0,0 +1,55 @@ +use std::pin::Pin; +use std::task::{Context, Poll}; +use tracing::info; + +pub struct ProgressReader { + inner: R, + bytes_read: u64, + last_log: u64, + log_interval: u64, + total_size: Option, +} + +impl ProgressReader { + pub fn new(inner: R, total_size: Option, log_interval_mb: u64) -> Self { + Self { + inner, + bytes_read: 0, + last_log: 0, + log_interval: log_interval_mb * 1024 * 1024, + total_size, + } + } +} + +impl tokio::io::AsyncRead for ProgressReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + let before = buf.filled().len(); + let result = Pin::new(&mut self.inner).poll_read(cx, buf); + let after = buf.filled().len(); + let bytes_read = (after - before) as u64; + + self.bytes_read += bytes_read; + + if self.bytes_read - self.last_log >= self.log_interval { + if let Some(total) = self.total_size { + let percent = (self.bytes_read as f64 / total as f64) * 100.0; + info!( + "Download progress: {:.1}% ({} MB / {} MB)", + percent, + self.bytes_read / (1024 * 1024), + total / (1024 * 1024) + ); + } else { + info!("Downloaded {} MB", self.bytes_read / (1024 * 1024)); + } + self.last_log = self.bytes_read; + } + + result + } +} diff --git a/modules/snapshot_bootstrapper/src/publisher.rs b/modules/snapshot_bootstrapper/src/publisher.rs new file mode 100644 index 00000000..61a3d904 --- /dev/null +++ b/modules/snapshot_bootstrapper/src/publisher.rs @@ -0,0 +1,149 @@ +use acropolis_common::{ + messages::{CardanoMessage, Message}, + snapshot::streaming_snapshot::{ + DRepCallback, DRepInfo, GovernanceProposal, PoolCallback, PoolInfo, ProposalCallback, + SnapshotCallbacks, SnapshotMetadata, StakeCallback, UtxoCallback, UtxoEntry, + }, + stake_addresses::AccountState, + BlockInfo, +}; +use anyhow::Result; +use caryatid_sdk::Context; +use std::sync::Arc; +use tracing::info; + +/// Handles publishing snapshot data to the message bus +pub struct SnapshotPublisher { + context: Arc>, + completion_topic: String, + snapshot_topic: String, + metadata: Option, + utxo_count: u64, + pools: Vec, + accounts: Vec, + dreps: Vec, + proposals: Vec, +} + +impl SnapshotPublisher { + pub fn new( + context: Arc>, + completion_topic: String, + snapshot_topic: String, + ) -> Self { + Self { + context, + completion_topic, + snapshot_topic, + metadata: None, + utxo_count: 0, + pools: Vec::new(), + accounts: Vec::new(), + dreps: Vec::new(), + proposals: Vec::new(), + } + } + + pub async fn publish_start(&self) -> Result<()> { + let message = Arc::new(Message::Snapshot( + acropolis_common::messages::SnapshotMessage::Startup, + )); + self.context.publish(&self.snapshot_topic, message).await + } + + pub async fn publish_completion(&self, block_info: BlockInfo) -> Result<()> { + let message = Arc::new(Message::Cardano(( + block_info, + CardanoMessage::SnapshotComplete, + ))); + self.context.publish(&self.completion_topic, message).await + } +} + +impl UtxoCallback for SnapshotPublisher { + fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { + self.utxo_count += 1; + + // Log progress every million UTXOs + if self.utxo_count.is_multiple_of(1_000_000) { + info!("Processed {} UTXOs", self.utxo_count); + } + // TODO: Accumulate UTXO data if needed or send in chunks to UTXOState processor + Ok(()) + } +} + +impl PoolCallback for SnapshotPublisher { + fn on_pools(&mut self, pools: Vec) -> Result<()> { + info!("Received {} pools", pools.len()); + self.pools.extend(pools); + // TODO: Accumulate pool data if needed or send in chunks to PoolState processor + Ok(()) + } +} + +impl StakeCallback for SnapshotPublisher { + fn on_accounts(&mut self, accounts: Vec) -> Result<()> { + info!("Received {} accounts", accounts.len()); + self.accounts.extend(accounts); + // TODO: Accumulate account data if needed or send in chunks to AccountState processor + Ok(()) + } +} + +impl DRepCallback for SnapshotPublisher { + fn on_dreps(&mut self, dreps: Vec) -> Result<()> { + info!("Received {} DReps", dreps.len()); + self.dreps.extend(dreps); + // TODO: Accumulate DRep data if needed or send in chunks to DRepState processor + Ok(()) + } +} + +impl ProposalCallback for SnapshotPublisher { + fn on_proposals(&mut self, proposals: Vec) -> Result<()> { + info!("Received {} proposals", proposals.len()); + self.proposals.extend(proposals); + // TODO: Accumulate proposal data if needed or send in chunks to ProposalState processor + Ok(()) + } +} + +impl SnapshotCallbacks for SnapshotPublisher { + fn on_metadata(&mut self, metadata: SnapshotMetadata) -> Result<()> { + info!("Snapshot metadata for epoch {}", metadata.epoch); + info!(" UTXOs: {:?}", metadata.utxo_count); + info!( + " Pot balances: treasury={}, reserves={}, deposits={}", + metadata.pot_balances.treasury, + metadata.pot_balances.reserves, + metadata.pot_balances.deposits + ); + info!( + " - Previous epoch blocks: {}", + metadata.blocks_previous_epoch.len() + ); + info!( + " - Current epoch blocks: {}", + metadata.blocks_current_epoch.len() + ); + + self.metadata = Some(metadata); + Ok(()) + } + + fn on_complete(&mut self) -> Result<()> { + info!("Snapshot parsing completed"); + info!("Final statistics:"); + info!(" - UTXOs processed: {}", self.utxo_count); + info!(" - Pools: {}", self.pools.len()); + info!(" - Accounts: {}", self.accounts.len()); + info!(" - DReps: {}", self.dreps.len()); + info!(" - Proposals: {}", self.proposals.len()); + // We could send a Resolver reference from here for large data, i.e. the UTXO set, + // which could be a file reference. For a file reference, we'd extend the parser to + // give us a callback value with the offset into the file; and we'd make the streaming + // UTXO parser public and reusable, adding it to the resolver implementation. + Ok(()) + } +} diff --git a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs b/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs deleted file mode 100644 index 9f22c356..00000000 --- a/modules/snapshot_bootstrapper/src/snapshot_bootstrapper.rs +++ /dev/null @@ -1,305 +0,0 @@ -use std::{str::FromStr, sync::Arc}; - -use acropolis_common::{ - genesis_values::GenesisValues, - hash::Hash, - messages::{CardanoMessage, GenesisCompleteMessage, Message}, - snapshot::{ - streaming_snapshot::{ - DRepCallback, DRepInfo, GovernanceProposal, PoolCallback, PoolInfo, ProposalCallback, - SnapshotCallbacks, SnapshotMetadata, StakeCallback, UtxoCallback, UtxoEntry, - }, - StreamingSnapshotParser, - }, - stake_addresses::AccountState, - BlockHash, BlockInfo, BlockStatus, Era, GenesisDelegates, -}; -use anyhow::Result; -use caryatid_sdk::{module, Context}; -use config::Config; -use tokio::time::Instant; -use tracing::{error, info, info_span, Instrument}; - -const DEFAULT_SNAPSHOT_TOPIC: &str = "cardano.snapshot"; -const DEFAULT_STARTUP_TOPIC: &str = "cardano.sequence.start"; -const DEFAULT_COMPLETION_TOPIC: &str = "cardano.sequence.bootstrapped"; - -/// Callback handler that accumulates snapshot data and builds state -struct SnapshotHandler { - context: Arc>, - snapshot_topic: String, - - // Accumulated data from callbacks - metadata: Option, - utxo_count: u64, - pools: Vec, - accounts: Vec, - dreps: Vec, - proposals: Vec, -} - -#[module( - message_type(Message), - name = "snapshot-bootstrapper", - description = "Snapshot Bootstrapper to broadcast state" -)] -pub struct SnapshotBootstrapper; - -impl SnapshotHandler { - fn new(context: Arc>, snapshot_topic: String) -> Self { - Self { - context, - snapshot_topic, - metadata: None, - utxo_count: 0, - pools: Vec::new(), - accounts: Vec::new(), - dreps: Vec::new(), - proposals: Vec::new(), - } - } - - /// Build BlockInfo from accumulated metadata - fn build_block_info(&self) -> Result { - let metadata = - self.metadata.as_ref().ok_or_else(|| anyhow::anyhow!("No metadata available"))?; - - // Create a synthetic BlockInfo representing the snapshot state - // This represents the last block included in the snapshot - Ok(BlockInfo { - status: BlockStatus::Immutable, // Snapshot blocks are immutable - slot: 0, // TODO: Extract from snapshot metadata if available - number: 0, // TODO: Extract from snapshot metadata if available - hash: BlockHash::default(), // TODO: Extract from snapshot metadata if available - epoch: metadata.epoch, - epoch_slot: 0, // TODO: Extract from snapshot metadata if available - new_epoch: false, // Not necessarily a new epoch - timestamp: 0, // TODO: Extract from snapshot metadata if available - era: Era::Conway, // TODO: Determine from snapshot or config - }) - } - - /// Build GenesisValues from snapshot data - fn build_genesis_values(&self) -> Result { - // TODO: These values should ideally come from the snapshot or configuration - // For now, using defaults for Conway era - Ok(GenesisValues { - byron_timestamp: 1506203091, // Byron mainnet genesis timestamp - shelley_epoch: 208, // Shelley started at epoch 208 on mainnet - shelley_epoch_len: 432000, // 5 days in seconds - // Shelley mainnet genesis hash (placeholder - should be from config) - shelley_genesis_hash: Hash::<32>::from_str( - "1a3be38bcbb7911969283716ad7aa550250226b76a61fc51cc9a9a35d9276d81", - ) - .unwrap(), - genesis_delegs: GenesisDelegates::try_from(vec![]).unwrap(), - }) - } - - async fn publish_start(&self) -> Result<()> { - anyhow::Context::context( - self.context - .message_bus - .publish( - &self.snapshot_topic, - Arc::new(Message::Snapshot( - acropolis_common::messages::SnapshotMessage::Startup, - )), - ) - .await, - "Failed to publish start message", - ) - } - - async fn publish_completion( - &self, - block_info: BlockInfo, - genesis_values: GenesisValues, - ) -> Result<()> { - let message = Message::Cardano(( - block_info, - CardanoMessage::GenesisComplete(GenesisCompleteMessage { - values: genesis_values, - }), - )); - - anyhow::Context::context( - self.context.message_bus.publish(&self.snapshot_topic, Arc::new(message)).await, - "Failed to publish completion", - ) - } -} - -impl UtxoCallback for SnapshotHandler { - fn on_utxo(&mut self, _utxo: UtxoEntry) -> Result<()> { - self.utxo_count += 1; - - // Log progress every million UTXOs - if self.utxo_count.is_multiple_of(1_000_000) { - info!("Processed {} UTXOs", self.utxo_count); - } - // TODO: Accumulate UTXO data if needed or send in chunks to UTXOState processor - Ok(()) - } -} - -impl PoolCallback for SnapshotHandler { - fn on_pools(&mut self, pools: Vec) -> Result<()> { - info!("Received {} pools", pools.len()); - self.pools.extend(pools); - // TODO: Publish pool data. - Ok(()) - } -} - -impl StakeCallback for SnapshotHandler { - fn on_accounts(&mut self, accounts: Vec) -> Result<()> { - info!("Received {} accounts", accounts.len()); - self.accounts.extend(accounts); - // TODO: Publish account data. - Ok(()) - } -} - -impl DRepCallback for SnapshotHandler { - fn on_dreps(&mut self, dreps: Vec) -> Result<()> { - info!("Received {} DReps", dreps.len()); - self.dreps.extend(dreps); - // TODO: Publish DRep data. - - Ok(()) - } -} - -impl ProposalCallback for SnapshotHandler { - fn on_proposals(&mut self, proposals: Vec) -> Result<()> { - info!("Received {} proposals", proposals.len()); - self.proposals.extend(proposals); - // TODO: Publish proposal data. - Ok(()) - } -} - -impl SnapshotCallbacks for SnapshotHandler { - fn on_metadata(&mut self, metadata: SnapshotMetadata) -> Result<()> { - info!("Received snapshot metadata for epoch {}", metadata.epoch); - info!(" - UTXOs: {:?}", metadata.utxo_count); - info!( - " - Pot balances: treasury={}, reserves={}, deposits={}", - metadata.pot_balances.treasury, - metadata.pot_balances.reserves, - metadata.pot_balances.deposits - ); - info!( - " - Previous epoch blocks: {}", - metadata.blocks_previous_epoch.len() - ); - info!( - " - Current epoch blocks: {}", - metadata.blocks_current_epoch.len() - ); - - self.metadata = Some(metadata); - Ok(()) - } - - fn on_complete(&mut self) -> Result<()> { - info!("Snapshot parsing completed"); - info!("Final statistics:"); - info!(" - UTXOs processed: {}", self.utxo_count); - info!(" - Pools: {}", self.pools.len()); - info!(" - Accounts: {}", self.accounts.len()); - info!(" - DReps: {}", self.dreps.len()); - info!(" - Proposals: {}", self.proposals.len()); - - // We could send a Resolver reference from here for large data, i.e. the UTXO set, - // which could be a file reference. For a file reference, we'd extend the parser to - // give us a callback value with the offset into the file; and we'd make the streaming - // UTXO parser public and reusable, adding it to the resolver implementation. - Ok(()) - } -} - -impl SnapshotBootstrapper { - pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { - // TODO: read a config file path, not the snapshot-path; implement TODOs below. - let file_path = config - .get_string("snapshot-path") - .inspect_err(|e| error!("failed to find snapshot-path config: {e}"))?; - - let startup_topic = - config.get_string("startup-topic").unwrap_or(DEFAULT_STARTUP_TOPIC.to_string()); - - let snapshot_topic = - config.get_string("snapshot-topic").unwrap_or(DEFAULT_SNAPSHOT_TOPIC.to_string()); - info!("Publishing snapshots on '{snapshot_topic}'"); - - let completion_topic = - config.get_string("completion-topic").unwrap_or(DEFAULT_COMPLETION_TOPIC.to_string()); - info!("Completing with '{completion_topic}'"); - - let mut subscription = context.subscribe(&startup_topic).await?; - - context.clone().run(async move { - let Ok(_) = subscription.read().await else { - return; - }; - info!("Received startup message"); - - // TODO: - // Read config file per docs in NOTES.md - // read nonces - // read headers - // read and process ALL of the snapshot files, not just one. - - let span = info_span!("snapshot_bootstrapper.handle"); - async { - if let Err(e) = - Self::process_snapshot(&file_path, context.clone(), &completion_topic).await - { - error!("Failed to process snapshot: {}", e); - } - } - .instrument(span) - .await; - }); - - Ok(()) - } - - async fn process_snapshot( - file_path: &str, - context: Arc>, - completion_topic: &str, - ) -> Result<()> { - let parser = StreamingSnapshotParser::new(file_path); - let mut callbacks = SnapshotHandler::new(context.clone(), completion_topic.to_string()); - - info!( - "Starting snapshot parsing and publishing from: {}", - file_path - ); - let start = Instant::now(); - - callbacks.publish_start().await?; - - // Parse the snapshot with our callback handler - parser.parse(&mut callbacks)?; - - let duration = start.elapsed(); - info!( - "✓ Parse and publish completed successfully in {:.2?}", - duration - ); - - // Build the final state from accumulated data - let block_info = callbacks.build_block_info()?; - let genesis_values = callbacks.build_genesis_values()?; - - // Publish completion message to trigger next phase (e.g., Mithril) - callbacks.publish_completion(block_info, genesis_values).await?; - - info!("Snapshot bootstrap completed successfully"); - Ok(()) - } -} diff --git a/processes/omnibus/Cargo.toml b/processes/omnibus/Cargo.toml index 9afa646f..bb171711 100644 --- a/processes/omnibus/Cargo.toml +++ b/processes/omnibus/Cargo.toml @@ -33,6 +33,7 @@ acropolis_module_historical_accounts_state = { path = "../../modules/historical_ acropolis_module_historical_epochs_state = { path = "../../modules/historical_epochs_state" } acropolis_module_block_vrf_validator = { path = "../../modules/block_vrf_validator" } acropolis_module_block_kes_validator = { path = "../../modules/block_kes_validator" } +acropolis_module_snapshot_bootstrapper = { path = "../../modules/snapshot_bootstrapper" } caryatid_process = { workspace = true } caryatid_module_clock = { workspace = true } diff --git a/processes/omnibus/omnibus.toml b/processes/omnibus/omnibus.toml index 0548466d..4b25d34e 100644 --- a/processes/omnibus/omnibus.toml +++ b/processes/omnibus/omnibus.toml @@ -1,5 +1,15 @@ # Top-level configuration for Acropolis omnibus process +# ============================================================================ +# Startup Configuration +# ============================================================================ +[startup] +method = "mithril" # Options: "mithril" | "snapshot" +topic = "cardano.sequence.start" + +# ============================================================================ +# Bootstrap Module Configurations +# ============================================================================ [module.genesis-bootstrapper] [module.mithril-snapshot-fetcher] @@ -10,6 +20,13 @@ download-max-age = "never" # Pause constraint E.g. "epoch:100", "block:1200" pause = "none" +[module.snapshot-bootstrapper] +network = "mainnet" +data-dir = "../../modules/snapshot_bootstrapper/data" + +# ============================================================================ +# Core Module Configurations +# ============================================================================ [module.peer-network-interface] sync-point = "snapshot" node-addresses = [ @@ -170,9 +187,9 @@ port = 4340 # Enable for message spying #topic = "cardano.#" -[startup] -topic = "cardano.sequence.start" - +# ============================================================================ +# Message Bus Configuration +# ============================================================================ [message-bus.external] class = "rabbit-mq" url = "amqp://127.0.0.1:5672/%2f" diff --git a/processes/omnibus/src/main.rs b/processes/omnibus/src/main.rs index c27df335..49f2cea3 100644 --- a/processes/omnibus/src/main.rs +++ b/processes/omnibus/src/main.rs @@ -27,6 +27,7 @@ use acropolis_module_mithril_snapshot_fetcher::MithrilSnapshotFetcher; use acropolis_module_parameters_state::ParametersState; use acropolis_module_peer_network_interface::PeerNetworkInterface; use acropolis_module_rest_blockfrost::BlockfrostREST; +use acropolis_module_snapshot_bootstrapper::SnapshotBootstrapper; use acropolis_module_spdd_state::SPDDState; use acropolis_module_spo_state::SPOState; use acropolis_module_stake_delta_filter::StakeDeltaFilter; @@ -44,6 +45,10 @@ use tracing_opentelemetry::OpenTelemetryLayer; use tracing_subscriber::prelude::*; use tracing_subscriber::{filter, fmt, EnvFilter, Registry}; +const STARTUP_METHOD_MITHRIL: &str = "mithril"; +const STARTUP_METHOD_SNAPSHOT: &str = "snapshot"; +const CONFIG_KEY_STARTUP_METHOD: &str = "startup.method"; + #[cfg(not(target_env = "msvc"))] use tikv_jemallocator::Jemalloc; #[cfg(not(target_env = "msvc"))] @@ -93,16 +98,39 @@ pub async fn main() -> Result<()> { Config::builder() .add_source(File::with_name(&args.config)) .add_source(Environment::with_prefix("ACROPOLIS")) - .build() - .unwrap(), + .build()?, ); // Create the process - let mut process = Process::::create(config).await; + let mut process = Process::::create(config.clone()).await; + + // Get startup method from config + let startup_method = config + .get_string(CONFIG_KEY_STARTUP_METHOD) + .unwrap_or_else(|_| STARTUP_METHOD_MITHRIL.to_string()); + + info!("Using startup method: {}", startup_method); + + // Register bootstrap modules based on the startup method + match startup_method.as_str() { + STARTUP_METHOD_MITHRIL => { + info!("Registering MithrilSnapshotFetcher"); + MithrilSnapshotFetcher::register(&mut process); + } + STARTUP_METHOD_SNAPSHOT => { + info!("Registering SnapshotBootstrapper"); + SnapshotBootstrapper::register(&mut process); + } + _ => { + panic!( + "Invalid startup method: {}. Must be one of: mithril, snapshot", + startup_method + ); + } + } // Register modules GenesisBootstrapper::register(&mut process); - MithrilSnapshotFetcher::register(&mut process); BlockUnpacker::register(&mut process); PeerNetworkInterface::register(&mut process); TxUnpacker::register(&mut process);