diff --git a/Cargo.lock b/Cargo.lock index 5ffbd2d1..f34c3e52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -295,6 +295,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "acropolis_module_indexer" +version = "0.1.0" +dependencies = [ + "acropolis_common", + "anyhow", + "caryatid_sdk", + "config", + "serde", + "tracing", +] + [[package]] name = "acropolis_module_mithril_snapshot_fetcher" version = "0.1.0" @@ -475,6 +487,23 @@ dependencies = [ "tracing", ] +[[package]] +name = "acropolis_process_indexer" +version = "0.1.0" +dependencies = [ + "acropolis_common", + "acropolis_module_block_unpacker", + "acropolis_module_genesis_bootstrapper", + "acropolis_module_indexer", + "acropolis_module_peer_network_interface", + "anyhow", + "caryatid_process", + "clap 4.5.51", + "config", + "tokio", + "tracing-subscriber", +] + [[package]] name = "acropolis_process_omnibus" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 736a8233..0ad4e54a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ members = [ "processes/replayer", # All-inclusive process to replay messages "processes/golden_tests", # All-inclusive golden tests process "processes/tx_submitter_cli", # CLI wrapper for TX submitter + "processes/indexer", # Minimal example indexer ] resolver = "2" diff --git a/common/Cargo.toml b/common/Cargo.toml index fc5e948d..1c23f39c 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -29,7 +29,6 @@ regex = "1" serde = { workspace = true, features = ["rc"] } serde_json = { workspace = true } serde_with = { workspace = true, features = ["base64"] } -tempfile = "3" tokio = { workspace = true } tracing = { workspace = true } futures = "0.3.31" @@ -40,8 +39,11 @@ rayon = "1.11.0" cryptoxide = "0.5.1" thiserror = "2.0.17" sha2 = "0.10.8" + +[dev-dependencies] caryatid_process = { workspace = true } config = { workspace = true } +tempfile = "3" [lib] crate-type = ["rlib"] diff --git a/common/src/commands/chain_sync.rs b/common/src/commands/chain_sync.rs new file mode 100644 index 00000000..77a55f12 --- /dev/null +++ b/common/src/commands/chain_sync.rs @@ -0,0 +1,6 @@ +use crate::{BlockHash, Slot}; + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum ChainSyncCommand { + FindIntersect { slot: Slot, hash: BlockHash }, +} diff --git a/common/src/commands/mod.rs b/common/src/commands/mod.rs index 0824d7a9..5747153a 100644 --- a/common/src/commands/mod.rs +++ b/common/src/commands/mod.rs @@ -1 +1,2 @@ +pub mod chain_sync; pub mod transactions; diff --git a/common/src/messages.rs b/common/src/messages.rs index 674187dc..61b14e49 100644 --- a/common/src/messages.rs +++ b/common/src/messages.rs @@ -3,6 +3,7 @@ // We don't use these messages in the acropolis_common crate itself #![allow(dead_code)] +use crate::commands::chain_sync::ChainSyncCommand; use crate::commands::transactions::{TransactionsCommand, TransactionsCommandResponse}; use crate::genesis_values::GenesisValues; use crate::ledger_state::SPOState; @@ -453,6 +454,7 @@ pub enum StateQueryResponse { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Command { Transactions(TransactionsCommand), + ChainSync(ChainSyncCommand), } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] diff --git a/modules/indexer/Cargo.toml b/modules/indexer/Cargo.toml new file mode 100644 index 00000000..aef2fcd7 --- /dev/null +++ b/modules/indexer/Cargo.toml @@ -0,0 +1,22 @@ +# Acropolis indexer module + +[package] +name = "acropolis_module_indexer" +version = "0.1.0" +edition = "2021" +authors = ["William Hankins "] +description = "Core indexer logic" +license = "Apache-2.0" + +[dependencies] +acropolis_common = { path = "../../common" } + +caryatid_sdk = { workspace = true } + +anyhow = { workspace = true } +config = { workspace = true } +serde = { workspace = true, features = ["rc"] } +tracing = { workspace = true } + +[lib] +path = "src/indexer.rs" diff --git a/modules/indexer/config.default.toml b/modules/indexer/config.default.toml new file mode 100644 index 00000000..e8549e76 --- /dev/null +++ b/modules/indexer/config.default.toml @@ -0,0 +1,2 @@ +# The topic to publish sync commands on +sync-command-topic = "cardano.sync.command" \ No newline at end of file diff --git a/modules/indexer/src/configuration.rs b/modules/indexer/src/configuration.rs new file mode 100644 index 00000000..6b2b5731 --- /dev/null +++ b/modules/indexer/src/configuration.rs @@ -0,0 +1,21 @@ +use anyhow::Result; +use config::Config; + +#[derive(serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct IndexerConfig { + pub sync_command_topic: String, +} + +impl IndexerConfig { + pub fn try_load(config: &Config) -> Result { + let full_config = Config::builder() + .add_source(config::File::from_str( + include_str!("../config.default.toml"), + config::FileFormat::Toml, + )) + .add_source(config.clone()) + .build()?; + Ok(full_config.try_deserialize()?) + } +} diff --git a/modules/indexer/src/indexer.rs b/modules/indexer/src/indexer.rs new file mode 100644 index 00000000..8877c6d5 --- /dev/null +++ b/modules/indexer/src/indexer.rs @@ -0,0 +1,67 @@ +//! Acropolis indexer module for Caryatid +mod configuration; + +use acropolis_common::{ + commands::chain_sync::ChainSyncCommand, + hash::Hash, + messages::{Command, Message}, +}; +use anyhow::Result; +use caryatid_sdk::{module, Context}; +use config::Config; +use std::{str::FromStr, sync::Arc}; +use tracing::info; + +use crate::configuration::IndexerConfig; + +/// Indexer module +#[module( + message_type(Message), + name = "indexer", + description = "Core indexer module for indexer process" +)] +pub struct Indexer; + +impl Indexer { + /// Async initialisation + pub async fn init(&self, context: Arc>, config: Arc) -> Result<()> { + let cfg = IndexerConfig::try_load(&config)?; + info!( + "Creating sync command publisher on '{}'", + cfg.sync_command_topic + ); + + let ctx = context.clone(); + + // This is a placeholder to test dynamic sync + context.run(async move { + let example = ChainSyncCommand::FindIntersect { + slot: 4492799, + hash: Hash::from_str( + "f8084c61b6a238acec985b59310b6ecec49c0ab8352249afd7268da5cff2a457", + ) + .expect("Valid hash"), + }; + + // Initial sync message (This will be read from config for first sync and from DB on subsequent runs) + ctx.message_bus + .publish( + &cfg.sync_command_topic, + Arc::new(Message::Command(Command::ChainSync(example.clone()))), + ) + .await + .unwrap(); + + // Simulate a later sync command to reset sync point to where we started + + ctx.message_bus + .publish( + &cfg.sync_command_topic, + Arc::new(Message::Command(Command::ChainSync(example))), + ) + .await + .unwrap(); + }); + Ok(()) + } +} diff --git a/modules/peer_network_interface/config.default.toml b/modules/peer_network_interface/config.default.toml index c48d02dc..5babba18 100644 --- a/modules/peer_network_interface/config.default.toml +++ b/modules/peer_network_interface/config.default.toml @@ -4,6 +4,8 @@ block-topic = "cardano.block.available" snapshot-completion-topic = "cardano.snapshot.complete" # The topic to wait for when listening for genesis values from another module genesis-completion-topic = "cardano.sequence.bootstrapped" +# The topic to listen on for runtime sync commands +sync-command-topic = "cardano.sync.command" # Upstream node connections node-addresses = [ @@ -19,6 +21,7 @@ magic-number = 764824073 # - "tip": sync from the very end of the chain # - "cache": replay messages from a local filesystem cache, then sync from the point right after that cache. # - "snapshot": wait for another module to restore from a snapshot, then sync from the point right after that snapshot. +# - "dynamic": awaits a sync command to begin fetching blocks, can change sync point at runtime. sync-point = "snapshot" # The cache dir to use when sync-point is "cache" cache-dir = "upstream-cache" \ No newline at end of file diff --git a/modules/peer_network_interface/src/chain_state.rs b/modules/peer_network_interface/src/chain_state.rs index b432d5fb..5c86b430 100644 --- a/modules/peer_network_interface/src/chain_state.rs +++ b/modules/peer_network_interface/src/chain_state.rs @@ -397,6 +397,67 @@ mod tests { state.handle_block_published(); } + #[test] + fn should_ignore_irrelevant_block_fetch_after_rollback() { + let mut state = ChainState::new(); + let p1 = PeerId(0); + state.handle_new_preferred_upstream(p1); + + let (h1, b1) = make_block(0, "first block"); + let (h2a, b2a) = make_block(1, "second block pre-rollback"); + let (h3a, b3a) = make_block(2, "third block pre-rollback"); + let (h2b, b2b) = make_block(1, "second block post-rollback"); + let (h3b, b3b) = make_block(1, "third block post-rollback"); + + // publish the first block + assert_eq!(state.handle_roll_forward(p1, h1.clone()), vec![p1]); + state.handle_body_fetched(h1.slot, h1.hash, b1.clone()); + assert_eq!( + state.next_unpublished_block(), + Some((&h1, b1.as_slice(), false)) + ); + state.handle_block_published(); + + // publish the second block + assert_eq!(state.handle_roll_forward(p1, h2a.clone()), vec![p1]); + state.handle_body_fetched(h2a.slot, h2a.hash, b2a.clone()); + assert_eq!( + state.next_unpublished_block(), + Some((&h2a, b2a.as_slice(), false)) + ); + state.handle_block_published(); + assert_eq!(state.next_unpublished_block(), None); + + // roll forward to the third block, but don't receive the body yet + assert_eq!(state.handle_roll_forward(p1, h3a.clone()), vec![p1]); + + // now, roll the chain back to the first block + state.handle_roll_backward(p1, Point::Specific(h1.slot, h1.hash.to_vec())); + assert_eq!(state.next_unpublished_block(), None); + + // and when we advance to the new second block, the system should report it as a rollback + assert_eq!(state.handle_roll_forward(p1, h2b.clone()), vec![p1]); + state.handle_body_fetched(h2b.slot, h2b.hash, b2b.clone()); + assert_eq!( + state.next_unpublished_block(), + Some((&h2b, b2b.as_slice(), true)) + ); + state.handle_block_published(); + + // we should not take any action on receiving the original third block + state.handle_body_fetched(h3a.slot, h3a.hash, b3a); + assert_eq!(state.next_unpublished_block(), None); + + // and the new third block should not be a rollback + assert_eq!(state.handle_roll_forward(p1, h3b.clone()), vec![p1]); + state.handle_body_fetched(h3b.slot, h3b.hash, b3b.clone()); + assert_eq!( + state.next_unpublished_block(), + Some((&h3b, b3b.as_slice(), false)) + ); + state.handle_block_published(); + } + #[test] fn should_not_report_rollback_for_unpublished_portion_of_chain() { let mut state = ChainState::new(); diff --git a/modules/peer_network_interface/src/configuration.rs b/modules/peer_network_interface/src/configuration.rs index 9f02ebb0..40461a45 100644 --- a/modules/peer_network_interface/src/configuration.rs +++ b/modules/peer_network_interface/src/configuration.rs @@ -11,6 +11,7 @@ pub enum SyncPoint { Tip, Cache, Snapshot, + Dynamic, } #[derive(serde::Deserialize)] @@ -20,6 +21,7 @@ pub struct InterfaceConfig { pub sync_point: SyncPoint, pub snapshot_completion_topic: String, pub genesis_completion_topic: String, + pub sync_command_topic: String, pub node_addresses: Vec, pub magic_number: u64, pub cache_dir: PathBuf, diff --git a/modules/peer_network_interface/src/network.rs b/modules/peer_network_interface/src/network.rs index 450eaa97..f6c4039e 100644 --- a/modules/peer_network_interface/src/network.rs +++ b/modules/peer_network_interface/src/network.rs @@ -78,14 +78,35 @@ impl NetworkManager { pub async fn run(mut self) -> Result<()> { while let Some(event) = self.events.recv().await { - match event { - NetworkEvent::PeerUpdate { peer, event } => { - self.handle_peer_update(peer, event); - self.publish_blocks().await?; + self.on_network_event(event).await?; + } + + Ok(()) + } + + async fn on_network_event(&mut self, event: NetworkEvent) -> Result<()> { + match event { + NetworkEvent::PeerUpdate { peer, event } => { + self.handle_peer_update(peer, event); + self.publish_blocks().await?; + } + NetworkEvent::SyncPointUpdate { point } => { + self.chain = ChainState::new(); + + for peer in self.peers.values_mut() { + peer.reqs.clear(); + } + + if let Point::Specific(slot, _) = point { + let (epoch, _) = self.block_sink.genesis_values.slot_to_epoch(slot); + self.block_sink.last_epoch = Some(epoch); } + + self.sync_to_point(point); } } - bail!("event sink closed") + + Ok(()) } pub fn handle_new_connection(&mut self, address: String, delay: Duration) { @@ -235,6 +256,7 @@ impl NetworkManager { pub enum NetworkEvent { PeerUpdate { peer: PeerId, event: PeerEvent }, + SyncPointUpdate { point: Point }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] diff --git a/modules/peer_network_interface/src/peer_network_interface.rs b/modules/peer_network_interface/src/peer_network_interface.rs index bcb86683..7456565c 100644 --- a/modules/peer_network_interface/src/peer_network_interface.rs +++ b/modules/peer_network_interface/src/peer_network_interface.rs @@ -5,8 +5,9 @@ mod network; use acropolis_common::{ BlockInfo, BlockStatus, + commands::chain_sync::ChainSyncCommand, genesis_values::GenesisValues, - messages::{CardanoMessage, Message, RawBlockMessage}, + messages::{CardanoMessage, Command, Message, RawBlockMessage}, upstream_cache::{UpstreamCache, UpstreamCacheRecord}, }; use anyhow::{Result, bail}; @@ -21,7 +22,7 @@ use std::{path::Path, sync::Arc, time::Duration}; use crate::{ configuration::{InterfaceConfig, SyncPoint}, connection::Header, - network::NetworkManager, + network::{NetworkEvent, NetworkManager}, }; #[module( @@ -43,6 +44,7 @@ impl PeerNetworkInterface { SyncPoint::Snapshot => Some(context.subscribe(&cfg.snapshot_completion_topic).await?), _ => None, }; + let command_subscription = context.subscribe(&cfg.sync_command_topic).await?; context.clone().run(async move { let genesis_values = if let Some(mut sub) = genesis_complete { @@ -82,12 +84,12 @@ impl PeerNetworkInterface { let manager = match cfg.sync_point { SyncPoint::Origin => { - let mut manager = Self::init_manager(cfg, sink); + let mut manager = Self::init_manager(cfg, sink, command_subscription); manager.sync_to_point(Point::Origin); manager } SyncPoint::Tip => { - let mut manager = Self::init_manager(cfg, sink); + let mut manager = Self::init_manager(cfg, sink, command_subscription); if let Err(error) = manager.sync_to_tip().await { warn!("could not sync to tip: {error:#}"); return; @@ -95,7 +97,7 @@ impl PeerNetworkInterface { manager } SyncPoint::Cache => { - let mut manager = Self::init_manager(cfg, sink); + let mut manager = Self::init_manager(cfg, sink, command_subscription); manager.sync_to_point(cache_sync_point); manager } @@ -108,7 +110,7 @@ impl PeerNetworkInterface { let (epoch, _) = sink.genesis_values.slot_to_epoch(slot); sink.last_epoch = Some(epoch); } - let mut manager = Self::init_manager(cfg, sink); + let mut manager = Self::init_manager(cfg, sink, command_subscription); manager.sync_to_point(point); manager } @@ -118,6 +120,7 @@ impl PeerNetworkInterface { } } } + SyncPoint::Dynamic => Self::init_manager(cfg, sink, command_subscription), }; if let Err(err) = manager.run().await { @@ -128,8 +131,16 @@ impl PeerNetworkInterface { Ok(()) } - fn init_manager(cfg: InterfaceConfig, sink: BlockSink) -> NetworkManager { + fn init_manager( + cfg: InterfaceConfig, + sink: BlockSink, + command_subscription: Box>, + ) -> NetworkManager { let (events_sender, events) = mpsc::channel(1024); + tokio::spawn(Self::forward_commands_to_events( + command_subscription, + events_sender.clone(), + )); let mut manager = NetworkManager::new(cfg.magic_number, events, events_sender, sink); for address in cfg.node_addresses { manager.handle_new_connection(address, Duration::ZERO); @@ -137,6 +148,27 @@ impl PeerNetworkInterface { manager } + async fn forward_commands_to_events( + mut subscription: Box>, + events_sender: mpsc::Sender, + ) -> Result<()> { + while let Ok((_, msg)) = subscription.read().await { + if let Message::Command(Command::ChainSync(ChainSyncCommand::FindIntersect { + slot, + hash, + })) = msg.as_ref() + { + let point = Point::new(*slot, hash.to_vec()); + + if events_sender.send(NetworkEvent::SyncPointUpdate { point }).await.is_err() { + bail!("event channel closed"); + } + } + } + + bail!("subscription closed"); + } + async fn init_cache( cache_dir: &Path, block_topic: &str, diff --git a/processes/indexer/Cargo.toml b/processes/indexer/Cargo.toml new file mode 100644 index 00000000..3a368efb --- /dev/null +++ b/processes/indexer/Cargo.toml @@ -0,0 +1,24 @@ +# Acropolis indexer process +[package] +name = "acropolis_process_indexer" +version = "0.1.0" +edition = "2021" +authors = ["William Hankins "] +description = "Acropolis indexer process containing core modules" +license = "Apache-2.0" + +[dependencies] +acropolis_common = { path = "../../common" } +acropolis_module_genesis_bootstrapper = { path = "../../modules/genesis_bootstrapper" } +acropolis_module_peer_network_interface = { path = "../../modules/peer_network_interface" } +acropolis_module_block_unpacker = { path = "../../modules/block_unpacker" } +acropolis_module_indexer = { path = "../../modules/indexer" } + +caryatid_process = { workspace = true } + +anyhow = { workspace = true } +clap = { workspace = true } +config = { workspace = true } +tracing-subscriber = { version = "0.3.20", features = ["registry", "env-filter"] } +tokio = { workspace = true } + diff --git a/processes/indexer/indexer.toml b/processes/indexer/indexer.toml new file mode 100644 index 00000000..013c8dac --- /dev/null +++ b/processes/indexer/indexer.toml @@ -0,0 +1,32 @@ +# Top-level configuration for Acropolis indexer process + +[module.genesis-bootstrapper] + +[module.peer-network-interface] +sync-point = "dynamic" +node-addresses = [ + "backbone.cardano.iog.io:3001", + "backbone.mainnet.cardanofoundation.org:3001", + "backbone.mainnet.emurgornd.com:3001", +] +magic-number = 764824073 + +[module.block-unpacker] + +[module.indexer] + +[startup] +topic = "cardano.sequence.start" + +[message-bus.internal] +class = "in-memory" +workers = 50 +dispatch-queue-size = 1000 +worker-queue-size = 100 +bulk-block-capacity = 50 +bulk-resume-capacity = 75 + +# Message routing +[[message-router.route]] # Everything is internal only +pattern = "#" +bus = "internal" diff --git a/processes/indexer/src/main.rs b/processes/indexer/src/main.rs new file mode 100644 index 00000000..50a3b458 --- /dev/null +++ b/processes/indexer/src/main.rs @@ -0,0 +1,42 @@ +use acropolis_common::messages::Message; +use acropolis_module_indexer::Indexer; +use anyhow::Result; +use caryatid_process::Process; +use clap::Parser; +use config::{Config, Environment, File}; +use std::sync::Arc; + +use acropolis_module_block_unpacker::BlockUnpacker; +use acropolis_module_genesis_bootstrapper::GenesisBootstrapper; +use acropolis_module_peer_network_interface::PeerNetworkInterface; + +#[derive(Debug, clap::Parser)] +struct Args { + #[arg(long, value_name = "PATH", default_value = "indexer.toml")] + config: String, +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + + tracing_subscriber::fmt().with_env_filter("info").init(); + + let config = Arc::new( + Config::builder() + .add_source(File::with_name(&args.config)) + .add_source(Environment::with_prefix("ACROPOLIS")) + .build() + .unwrap(), + ); + + let mut process = Process::::create(config).await; + + GenesisBootstrapper::register(&mut process); + BlockUnpacker::register(&mut process); + PeerNetworkInterface::register(&mut process); + Indexer::register(&mut process); + + process.run().await?; + Ok(()) +}