From 9a05205bb2d201a9ab8e527b2596a445cbf9e6f6 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Mon, 3 Nov 2025 17:22:22 -0300 Subject: [PATCH 01/26] almost ready --- Cargo.lock | 129 +++++---------- crates/rbuilder-operator/Cargo.toml | 3 +- crates/rbuilder-operator/src/clickhouse.rs | 74 +++++++++ crates/rbuilder-operator/src/lib.rs | 1 + crates/rbuilder-utils/Cargo.toml | 3 +- .../src/clickhouse/backup/macros.rs | 54 ------ .../src/clickhouse/backup/mod.rs | 156 ++++++++++-------- .../src/clickhouse/backup/primitives.rs | 16 +- .../rbuilder-utils/src/clickhouse/indexer.rs | 71 ++++++-- crates/rbuilder-utils/src/clickhouse/mod.rs | 68 +++++++- crates/rbuilder-utils/src/clickhouse/serde.rs | 82 +++++++++ crates/rbuilder-utils/src/lib.rs | 1 + crates/rbuilder-utils/src/serde/mod.rs | 1 + 13 files changed, 416 insertions(+), 243 deletions(-) create mode 100644 crates/rbuilder-operator/src/clickhouse.rs delete mode 100644 crates/rbuilder-utils/src/clickhouse/backup/macros.rs create mode 100644 crates/rbuilder-utils/src/clickhouse/serde.rs create mode 100644 crates/rbuilder-utils/src/serde/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 4f355c3ec..bcfc36b20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1809,7 +1809,7 @@ dependencies = [ "bitflags 2.9.4", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.13.0", "proc-macro2 1.0.101", "quote 1.0.41", "regex", @@ -1827,7 +1827,7 @@ dependencies = [ "bitflags 2.9.4", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2 1.0.101", @@ -2473,41 +2473,14 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "clickhouse" -version = "0.12.2" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3093f817c4f81c8bd174ed8dd30eac785821a8a7eef27a7dcb7f8cd0d0f6548" +checksum = "52d6ac02411e84914fdf4e0565bfe02fc4bebdf375bd1fc58168bad74b3707a2" dependencies = [ "bstr", "bytes", "cityhash-rs", - "clickhouse-derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "futures-channel", - "http-body-util", - "hyper 1.7.0", - "hyper-tls 0.6.0", - "hyper-util", - "lz4_flex", - "replace_with", - "sealed 0.5.0", - "serde", - "static_assertions", - "thiserror 1.0.69", - "time", - "tokio", - "url", - "uuid", -] - -[[package]] -name = "clickhouse" -version = "0.13.3" -source = "git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2#8cf3d2e138dd121367fa10e875d3f91374b075b2" -dependencies = [ - "bstr", - "bytes", - "cityhash-rs", - "clickhouse-derive 0.2.0 (git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2)", + "clickhouse-macros", "clickhouse-types", "futures-channel", "futures-util", @@ -2518,7 +2491,7 @@ dependencies = [ "lz4_flex", "quanta", "replace_with", - "sealed 0.6.0", + "sealed", "serde", "static_assertions", "thiserror 2.0.17", @@ -2541,9 +2514,10 @@ dependencies = [ ] [[package]] -name = "clickhouse-derive" -version = "0.2.0" -source = "git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2#8cf3d2e138dd121367fa10e875d3f91374b075b2" +name = "clickhouse-macros" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6669899e23cb87b43daf7996f0ea3b9c07d0fb933d745bb7b815b052515ae3" dependencies = [ "proc-macro2 1.0.101", "quote 1.0.41", @@ -2554,7 +2528,8 @@ dependencies = [ [[package]] name = "clickhouse-types" version = "0.1.0" -source = "git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2#8cf3d2e138dd121367fa10e875d3f91374b075b2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235f72141cfbe1d2d930d8156a34814c8a3d60491febb9af64cc52a203444764" dependencies = [ "bytes", "thiserror 2.0.17", @@ -3001,7 +2976,7 @@ dependencies = [ "cssparser-macros", "dtoa-short", "itoa", - "phf 0.10.1", + "phf 0.11.3", "smallvec", ] @@ -3429,7 +3404,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3803,7 +3778,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5326,7 +5301,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.57.0", + "windows-core 0.62.2", ] [[package]] @@ -7458,7 +7433,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7585,7 +7560,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" dependencies = [ - "proc-macro-crate 1.1.3", + "proc-macro-crate 3.4.0", "proc-macro2 1.0.101", "quote 1.0.41", "syn 2.0.106", @@ -7984,9 +7959,7 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" dependencies = [ - "phf_macros 0.10.0", "phf_shared 0.10.0", - "proc-macro-hack", ] [[package]] @@ -7995,7 +7968,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_macros 0.11.3", + "phf_macros", "phf_shared 0.11.3", "serde", ] @@ -8030,20 +8003,6 @@ dependencies = [ "rand 0.8.5", ] -[[package]] -name = "phf_macros" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0" -dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", - "proc-macro-hack", - "proc-macro2 1.0.101", - "quote 1.0.41", - "syn 1.0.109", -] - [[package]] name = "phf_macros" version = "0.11.3" @@ -8650,12 +8609,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - [[package]] name = "proc-macro2" version = "0.4.30" @@ -8803,8 +8756,8 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ - "heck 0.4.1", - "itertools 0.11.0", + "heck 0.5.0", + "itertools 0.14.0", "log", "multimap", "once_cell", @@ -8824,7 +8777,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2 1.0.101", "quote 1.0.41", "syn 2.0.106", @@ -8950,7 +8903,7 @@ dependencies = [ "once_cell", "socket2 0.6.1", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -9412,7 +9365,7 @@ dependencies = [ "alloy-transport-http", "bid-scraper", "built", - "clickhouse 0.12.2", + "clickhouse", "ctor", "derivative", "exponential-backoff", @@ -9432,6 +9385,7 @@ dependencies = [ "rbuilder", "rbuilder-config", "rbuilder-primitives", + "rbuilder-utils", "redis", "reqwest 0.12.24", "reth-primitives", @@ -9529,8 +9483,8 @@ dependencies = [ "ahash", "alloy-primitives 1.4.1", "auto_impl", - "clickhouse 0.13.3", - "clickhouse-derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "clickhouse", + "clickhouse-derive", "derivative", "derive_more 2.0.1", "dyn-clone", @@ -9544,6 +9498,7 @@ dependencies = [ "reqwest 0.12.24", "reth-tasks 1.8.2", "serde", + "serde_bytes", "serde_json", "serde_with", "sha2 0.10.9", @@ -12958,7 +12913,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -13225,18 +13180,6 @@ dependencies = [ "untrusted 0.9.0", ] -[[package]] -name = "sealed" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d" -dependencies = [ - "heck 0.4.1", - "proc-macro2 1.0.101", - "quote 1.0.41", - "syn 2.0.106", -] - [[package]] name = "sealed" version = "0.6.0" @@ -13429,6 +13372,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -14553,7 +14506,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix 1.1.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -15989,7 +15942,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/crates/rbuilder-operator/Cargo.toml b/crates/rbuilder-operator/Cargo.toml index 030c1b2de..995c6d1ee 100644 --- a/crates/rbuilder-operator/Cargo.toml +++ b/crates/rbuilder-operator/Cargo.toml @@ -25,6 +25,7 @@ systemd-units = { enable = false, start = false, unit-name = "rbuilder-operator" [dependencies] rbuilder-primitives.workspace = true +rbuilder-utils.workspace = true rbuilder-config.workspace = true rbuilder.workspace = true metrics_macros.workspace = true @@ -70,7 +71,7 @@ iceoryx2 = "0.7.0" iceoryx2-bb-container = "0.7.0" prometheus.workspace = true ctor.workspace = true -clickhouse = { version = "0.12.2", features = ["time", "uuid", "native-tls"] } +clickhouse = { version = "0.14.0", features = ["time", "uuid", "native-tls"] } futures-util.workspace = true parking_lot.workspace = true lazy_static.workspace = true diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs new file mode 100644 index 000000000..720d04f36 --- /dev/null +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -0,0 +1,74 @@ +//! Clickhouse integration to save all the blocks we build and submit to relays. + +use alloy_primitives::U256; +use clickhouse::Row; +use rbuilder_utils::clickhouse::{ + backup::primitives::{ClickhouseIndexableData, ClickhouseRowExt}, + serde::{option_u256, vec_u256}, +}; +use serde::{Deserialize, Serialize}; + +/// BlockRow to insert in clickhouse and also as entry type for the indexer since the BlockRow is made from a few &objects so it makes no sense to have a Block type and copy all the fields. +#[derive(Debug, Clone, Serialize, Deserialize, Row)] +pub struct BlockRow { + pub block_number: u64, + pub profit: String, + pub slot: u64, + pub hash: String, + pub gas_limit: u64, + pub gas_used: u64, + pub base_fee: u64, + pub parent_hash: String, + pub proposer_pubkey: String, + pub proposer_fee_recipient: String, + pub builder_pubkey: String, + pub timestamp: u64, + pub timestamp_datetime: i64, + pub orders_closed_at: i64, + pub sealed_at: i64, + pub algorithm: String, + + #[serde(with = "option_u256")] + pub true_value: Option, + #[serde(with = "option_u256")] + pub best_relay_value: Option, + #[serde(with = "option_u256")] + pub block_value: Option, + + pub used_bundle_hashes: Vec, + pub used_bundle_uuids: Vec, + pub used_sbundles_hashes: Vec, + pub delayed_payment_sources: Vec, + + #[serde(with = "vec_u256")] + pub delayed_payment_values: Vec, + + pub delayed_payment_addresses: Vec, +} + +impl ClickhouseRowExt for BlockRow { + type TraceId = String; + const TABLE_NAME: &'static str = "blocks"; + + fn trace_id(&self) -> String { + self.hash.clone() + } + + fn to_row_ref(row: &Self) -> &::Value<'_> { + row + } +} + +impl ClickhouseIndexableData for BlockRow { + type ClickhouseRowType = BlockRow; + + const DATA_NAME: &'static str = ::TABLE_NAME; + + fn trace_id(&self) -> String { + self.hash.clone() + } + + fn to_row(self, _builder_name: String) -> Self::ClickhouseRowType { + self + } +} diff --git a/crates/rbuilder-operator/src/lib.rs b/crates/rbuilder-operator/src/lib.rs index 180b9cb4b..6b3cd56c7 100644 --- a/crates/rbuilder-operator/src/lib.rs +++ b/crates/rbuilder-operator/src/lib.rs @@ -1,6 +1,7 @@ pub mod bidding_service_wrapper; pub mod blocks_processor; pub mod build_info; +pub mod clickhouse; pub mod flashbots_config; pub mod flashbots_signer; pub mod metrics; diff --git a/crates/rbuilder-utils/Cargo.toml b/crates/rbuilder-utils/Cargo.toml index af8576010..12e9ca869 100644 --- a/crates/rbuilder-utils/Cargo.toml +++ b/crates/rbuilder-utils/Cargo.toml @@ -20,6 +20,7 @@ governor = "0.6.3" ahash.workspace = true reqwest = { workspace = true, features = ["blocking"] } serde_with = { workspace = true, features = ["time_0_3"] } +serde_bytes = "0.11" toml.workspace = true tracing.workspace = true time.workspace = true @@ -43,7 +44,7 @@ tokio = { version = "1.40.0", default-features = false, features = [ "test-util" ] } -clickhouse = { git = "https://github.com/ClickHouse/clickhouse-rs", rev = "8cf3d2e138dd121367fa10e875d3f91374b075b2", features = [ +clickhouse = { version = "0.14.0", features = [ "inserter", "time", "uuid", diff --git a/crates/rbuilder-utils/src/clickhouse/backup/macros.rs b/crates/rbuilder-utils/src/clickhouse/backup/macros.rs deleted file mode 100644 index 5b08591c4..000000000 --- a/crates/rbuilder-utils/src/clickhouse/backup/macros.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Helpful macros spawning clickhouse indexer tasks. - -// Rationale: a simple text-replacement macro was much more effective compared to fighting the -// compiler with additional trait bounds on the [`clickhouse::Row`] trait. - -#[macro_export] -macro_rules! spawn_clickhouse_inserter { - ($executor:ident, $runner:ident, $name:expr, $target:expr) => {{ - $executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { - let mut shutdown_guard = None; - tokio::select! { - _ = $runner.run_loop() => { - tracing::info!(target: $target, "clickhouse {} indexer channel closed", $name); - } - guard = shutdown => { - tracing::info!(target: $target, "Received shutdown for {} indexer, performing cleanup", $name); - shutdown_guard = Some(guard); - }, - } - - match $runner.end().await { - Ok(quantities) => { - tracing::info!(target: $target, ?quantities, "finalized clickhouse {} inserter", $name); - } - Err(e) => { - tracing::error!(target: $target, ?e, "failed to write end insertion of {} to indexer", $name); - } - } - - drop(shutdown_guard); - }); - }}; -} - -#[macro_export] -macro_rules! spawn_clickhouse_backup { - ($executor:ident, $backup:ident, $name: expr, $target:expr) => {{ - $executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { - let mut shutdown_guard = None; - tokio::select! { - _ = $backup.run() => { - tracing::info!(target: $target, "clickhouse {} backup channel closed", $name); - } - guard = shutdown => { - tracing::info!(target: $target, "Received shutdown for {} backup, performing cleanup", $name); - shutdown_guard = Some(guard); - }, - } - - $backup.end().await; - drop(shutdown_guard); - }); - }}; -} diff --git a/crates/rbuilder-utils/src/clickhouse/backup/mod.rs b/crates/rbuilder-utils/src/clickhouse/backup/mod.rs index 67d800bd3..9d22006f2 100644 --- a/crates/rbuilder-utils/src/clickhouse/backup/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse/backup/mod.rs @@ -1,10 +1,8 @@ -pub mod macros; pub mod metrics; pub mod primitives; use std::{ collections::VecDeque, - marker::PhantomData, path::PathBuf, sync::{Arc, RwLock}, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, @@ -21,7 +19,7 @@ use crate::{ clickhouse::{ backup::{ metrics::Metrics, - primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + primitives::{ClickhouseIndexableData, ClickhouseRowExt}, }, indexer::{ default_disk_backup_database_path, MAX_DISK_BACKUP_SIZE_BYTES, @@ -72,7 +70,7 @@ impl FailedCommit { } } -impl Default for FailedCommit { +impl Default for FailedCommit { fn default() -> Self { Self { rows: Vec::new(), @@ -230,15 +228,13 @@ pub(crate) enum DiskBackupError { /// A disk backup for failed commits. This handle to a database allows to write only to one table /// for scoped access. If you want to write to another table, clone it using /// [`Self::clone_with_table`]. -#[derive(Debug)] -pub struct DiskBackup { +#[derive(Debug, Clone)] +pub struct DiskBackup { db: Arc>, config: DiskBackupConfig, - - _marker: PhantomData, } -impl DiskBackup { +impl DiskBackup { pub fn new( config: DiskBackupConfig, task_executor: &TaskExecutor, @@ -253,7 +249,6 @@ impl DiskBackup { let disk_backup = Self { db: Arc::new(RwLock::new(db)), config, - _marker: Default::default(), }; task_executor.spawn({ @@ -265,32 +260,16 @@ impl DiskBackup { Ok(disk_backup) } - - /// Like `clone`, but allows to change the type parameter `U`. - pub fn clone_to(&self) -> DiskBackup { - DiskBackup { - db: self.db.clone(), - config: self.config.clone(), - _marker: Default::default(), - } - } -} - -impl Clone for DiskBackup { - fn clone(&self) -> Self { - Self { - db: self.db.clone(), - config: self.config.clone(), - _marker: Default::default(), - } - } } -impl DiskBackup { +impl DiskBackup { /// Saves a new failed commit to disk. `commit_immediately` indicates whether to force /// durability on write. - fn save(&mut self, data: &FailedCommit) -> Result { - let table_def = Table::new(T::ORDER); + fn save( + &mut self, + data: &FailedCommit, + ) -> Result { + let table_def = Table::new(T::TABLE_NAME); // NOTE: not efficient, but we don't expect to store a lot of data here. let bytes = serde_json::to_vec(&data)?; @@ -314,10 +293,10 @@ impl DiskBackup { } /// Retrieves the oldest failed commit from disk, if any. - fn retrieve_oldest( + fn retrieve_oldest( &mut self, ) -> Result>>, DiskBackupError> { - let table_def = Table::new(T::ORDER); + let table_def = Table::new(T::TABLE_NAME); let reader = self.db.read().expect("not poisoned").begin_read()?; let table = match reader.open_table(table_def) { @@ -353,8 +332,11 @@ impl DiskBackup { } /// Deletes the failed commit with the given key from disk. - fn delete(&mut self, key: DiskBackupKey) -> Result { - let table_def = Table::new(T::ORDER); + fn delete( + &mut self, + key: DiskBackupKey, + ) -> Result { + let table_def = Table::new(T::TABLE_NAME); let mut writer = self.db.write().expect("not poisoned").begin_write()?; writer.set_durability(redb::Durability::Immediate)?; @@ -508,7 +490,7 @@ pub struct Backup { /// By sending backup data less often, we give time gaps for these operation to be performed. rx: mpsc::Receiver>, /// The disk cache of failed commits. - disk_backup: DiskBackup, + disk_backup: DiskBackup, /// The in-memory cache of failed commits. memory_backup: MemoryBackup, /// A clickhouse inserter for committing again the data. @@ -529,7 +511,7 @@ impl Backup { pub fn new( rx: mpsc::Receiver>, inserter: Inserter, - disk_backup: DiskBackup, + disk_backup: DiskBackup, ) -> Self { Self { rx, @@ -553,7 +535,7 @@ impl Backup { /// Backs up a failed commit, first trying to write to disk, then to memory. fn backup(&mut self, failed_commit: FailedCommit) { let quantities = failed_commit.quantities; - tracing::debug!(target: TARGET, order = T::ORDER, bytes = ?quantities.bytes, rows = ?quantities.rows, "backing up failed commit"); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, bytes = ?quantities.bytes, rows = ?quantities.rows, "backing up failed commit"); #[cfg(any(test, feature = "test-utils"))] if self.use_only_memory_backup { @@ -568,23 +550,27 @@ impl Backup { let start = Instant::now(); match self.disk_backup.save(&failed_commit) { Ok(stats) => { - tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "saved failed commit to disk"); - MetricsType::set_disk_backup_size(stats.size_bytes, stats.total_batches, T::ORDER); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "saved failed commit to disk"); + MetricsType::set_disk_backup_size( + stats.size_bytes, + stats.total_batches, + T::TABLE_NAME, + ); return; } Err(e) => { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit, trying in-memory"); - MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to write commit, trying in-memory"); + MetricsType::increment_backup_disk_errors(T::TABLE_NAME, e.as_ref()); } }; let stats = self.memory_backup.save(failed_commit); - MetricsType::set_memory_backup_size(stats.size_bytes, stats.total_batches, T::ORDER); - tracing::debug!(target: TARGET, order = T::ORDER, bytes = ?quantities.bytes, rows = ?quantities.rows, ?stats, "saved failed commit in-memory"); + MetricsType::set_memory_backup_size(stats.size_bytes, stats.total_batches, T::TABLE_NAME); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, bytes = ?quantities.bytes, rows = ?quantities.rows, ?stats, "saved failed commit in-memory"); if let Some((stats, oldest_quantities)) = self.memory_backup.drop_excess() { - tracing::warn!(target: TARGET, order = T::ORDER, ?stats, "failed commits exceeded max memory backup size, dropping oldest"); + tracing::warn!(target: TARGET, order = T::TABLE_NAME, ?stats, "failed commits exceeded max memory backup size, dropping oldest"); MetricsType::process_backup_data_lost_quantities(&oldest_quantities); // Clear the cached last commit if it was from memory and we just dropped it. self.last_cached = self @@ -597,12 +583,12 @@ impl Backup { /// Retrieves the oldest failed commit, first trying from memory, then from disk. fn retrieve_oldest(&mut self) -> Option> { if let Some(cached) = self.last_cached.take() { - tracing::debug!(target: TARGET, order = T::ORDER, rows = cached.commit.rows.len(), "retrieved last cached failed commit"); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, rows = cached.commit.rows.len(), "retrieved last cached failed commit"); return Some(cached); } if let Some(commit) = self.memory_backup.retrieve_oldest() { - tracing::debug!(target: TARGET, order = T::ORDER, rows = commit.rows.len(), "retrieved oldest failed commit from memory"); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, rows = commit.rows.len(), "retrieved oldest failed commit from memory"); return Some(RetrievedFailedCommit { source: BackupSource::Memory, commit, @@ -612,7 +598,7 @@ impl Backup { match self.disk_backup.retrieve_oldest() { Ok(maybe_commit) => { maybe_commit.inspect(|data| { - tracing::debug!(target: TARGET, order = T::ORDER, rows = data.stats.total_batches, "retrieved oldest failed commit from disk"); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, rows = data.stats.total_batches, "retrieved oldest failed commit from disk"); }) .map(|data| RetrievedFailedCommit { source: BackupSource::Disk(data.key), @@ -620,8 +606,8 @@ impl Backup { }) } Err(e) => { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to retrieve oldest failed commit from disk"); - MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to retrieve oldest failed commit from disk"); + MetricsType::increment_backup_disk_errors(T::TABLE_NAME, e.as_ref()); None } } @@ -634,7 +620,7 @@ impl Backup { if let Err(e) = self.inserter.write(value_ref).await { MetricsType::increment_write_failures(e.to_string()); - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to write to backup inserter"); continue; } } @@ -644,32 +630,32 @@ impl Backup { async fn purge_commit(&mut self, retrieved: &RetrievedFailedCommit) { if let BackupSource::Disk(key) = retrieved.source { let start = Instant::now(); - match self.disk_backup.delete(key) { + match self.disk_backup.delete::(key) { Ok(stats) => { - tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "deleted failed commit from disk"); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "deleted failed commit from disk"); MetricsType::set_disk_backup_size( stats.size_bytes, stats.total_batches, - T::ORDER, + T::TABLE_NAME, ); } Err(e) => { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to purge failed commit from disk"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to purge failed commit from disk"); } } - tracing::debug!(target: TARGET, order = T::ORDER, "purged committed failed commit from disk"); + tracing::debug!(target: TARGET, order = T::TABLE_NAME, "purged committed failed commit from disk"); } } /// Run the backup actor until it is possible to receive messages. /// /// If some data were stored on disk previously, they will be retried first. - pub async fn run(&mut self) { + async fn run(&mut self) { loop { tokio::select! { maybe_failed_commit = self.rx.recv() => { let Some(failed_commit) = maybe_failed_commit else { - tracing::error!(target: TARGET, order = T::ORDER, "backup channel closed"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, "backup channel closed"); break; }; @@ -678,7 +664,7 @@ impl Backup { _ = self.interval.tick() => { let Some(oldest) = self.retrieve_oldest() else { self.interval.reset(); - MetricsType::set_backup_empty_size(T::ORDER); + MetricsType::set_backup_empty_size(T::TABLE_NAME); continue // Nothing to do! }; @@ -687,14 +673,14 @@ impl Backup { let start = Instant::now(); match self.inserter.force_commit().await { Ok(quantities) => { - tracing::info!(target: TARGET, order = T::ORDER, ?quantities, "successfully backed up"); + tracing::info!(target: TARGET, order = T::TABLE_NAME, ?quantities, "successfully backed up"); MetricsType::process_backup_data_quantities(&quantities.into()); MetricsType::record_batch_commit_time(start.elapsed()); self.interval.reset(); self.purge_commit(&oldest).await; } Err(e) => { - tracing::error!(target: TARGET, order = T::ORDER, ?e, quantities = ?oldest.commit.quantities, "failed to commit bundle to clickhouse from backup"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, quantities = ?oldest.commit.quantities, "failed to commit bundle to clickhouse from backup"); MetricsType::increment_commit_failures(e.to_string()); self.last_cached = Some(oldest); continue; @@ -707,41 +693,63 @@ impl Backup { /// To call on shutdown, tries make a last-resort attempt to post back to Clickhouse all /// in-memory data. - pub async fn end(mut self) { + async fn end(mut self) { for failed_commit in self.memory_backup.failed_commits.drain(..) { for row in &failed_commit.rows { let value_ref = T::to_row_ref(row); if let Err(e) = self.inserter.write(value_ref).await { - tracing::error!( target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter during shutdown"); + tracing::error!( target: TARGET, order = T::TABLE_NAME, ?e, "failed to write to backup inserter during shutdown"); MetricsType::increment_write_failures(e.to_string()); continue; } } if let Err(e) = self.inserter.force_commit().await { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit backup to CH during shutdown, trying disk"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to commit backup to CH during shutdown, trying disk"); MetricsType::increment_commit_failures(e.to_string()); } if let Err(e) = self.disk_backup.save(&failed_commit) { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit to disk backup during shutdown"); - MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to write commit to disk backup during shutdown"); + MetricsType::increment_backup_disk_errors(T::TABLE_NAME, e.as_ref()); } } if let Err(e) = self.disk_backup.flush().await { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to flush disk backup during shutdown"); - MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to flush disk backup during shutdown"); + MetricsType::increment_backup_disk_errors(T::TABLE_NAME, e.as_ref()); } else { - tracing::info!(target: TARGET, order = T::ORDER, "flushed disk backup during shutdown"); + tracing::info!(target: TARGET, order = T::TABLE_NAME, "flushed disk backup during shutdown"); } if let Err(e) = self.inserter.end().await { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to end backup inserter during shutdown"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to end backup inserter during shutdown"); } else { - tracing::info!(target: TARGET, order = T::ORDER, "successfully ended backup inserter during shutdown"); + tracing::info!(target: TARGET, order = T::TABLE_NAME, "successfully ended backup inserter during shutdown"); } } + + /// Spawns the inserter runner on the given task executor. + pub fn spawn(mut self, task_executor: &TaskExecutor, name: String, target: &'static str) + where + MetricsType: Send + Sync + 'static, + for<'a> ::Value<'a>: Sync, + { + task_executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { + let mut shutdown_guard = None; + tokio::select! { + _ = self.run() => { + tracing::info!(target, "clickhouse {} backup channel closed", name); + } + guard = shutdown => { + tracing::info!(target, "Received shutdown for {} backup, performing cleanup", name); + shutdown_guard = Some(guard); + }, + } + self.end().await; + drop(shutdown_guard); + }); + } } #[cfg(any(test, feature = "test-utils"))] @@ -749,9 +757,11 @@ impl Backup { pub fn new_test( rx: mpsc::Receiver>, inserter: Inserter, - disk_backup: DiskBackup, + disk_backup: DiskBackup, use_only_memory_backup: bool, ) -> Self { + use std::marker::PhantomData; + Self { rx, inserter, diff --git a/crates/rbuilder-utils/src/clickhouse/backup/primitives.rs b/crates/rbuilder-utils/src/clickhouse/backup/primitives.rs index 9bc53031b..bb1434a39 100644 --- a/crates/rbuilder-utils/src/clickhouse/backup/primitives.rs +++ b/crates/rbuilder-utils/src/clickhouse/backup/primitives.rs @@ -1,16 +1,18 @@ -use alloy_primitives::B256; use clickhouse::{Row, RowWrite}; use serde::{de::DeserializeOwned, Serialize}; pub trait ClickhouseRowExt: Row + RowWrite + Serialize + DeserializeOwned + Sync + Send + 'static { + /// Type of + type TraceId: std::fmt::Display + Send + Sync; + /// The type of such row, e.g. "bundles" or "bundle_receipts". Used as backup db table name and /// for informational purposes. - const ORDER: &'static str; + const TABLE_NAME: &'static str; /// An identifier of such row. - fn hash(&self) -> B256; + fn trace_id(&self) -> Self::TraceId; /// Internal function that takes the inner row types and extracts the reference needed for /// Clickhouse inserter functions like `Inserter::write`. While a default implementation is not @@ -19,15 +21,15 @@ pub trait ClickhouseRowExt: } /// An high-level order type that can be indexed in clickhouse. -pub trait ClickhouseIndexableOrder: Sized { +pub trait ClickhouseIndexableData: Sized { /// The associated inner row type that can be serialized into Clickhouse data. type ClickhouseRowType: ClickhouseRowExt; /// The type of such order, e.g. "bundles" or "transactions". For informational purposes. - const ORDER: &'static str; + const DATA_NAME: &'static str; - /// An identifier of such order. - fn hash(&self) -> B256; + /// An identifier of such element for when we need to trace it. + fn trace_id(&self) -> ::TraceId; /// Converts such order into the associated Clickhouse row type. fn to_row(self, builder_name: String) -> Self::ClickhouseRowType; diff --git a/crates/rbuilder-utils/src/clickhouse/indexer.rs b/crates/rbuilder-utils/src/clickhouse/indexer.rs index bfc732ed8..296da120d 100644 --- a/crates/rbuilder-utils/src/clickhouse/indexer.rs +++ b/crates/rbuilder-utils/src/clickhouse/indexer.rs @@ -11,13 +11,14 @@ const TARGET: &str = "indexer"; use clickhouse::{ error::Result as ClickhouseResult, inserter::Inserter, Client as ClickhouseClient, Row, }; +use reth_tasks::TaskExecutor; use tokio::sync::mpsc; use crate::{ clickhouse::{ backup::{ metrics::Metrics, - primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + primitives::{ClickhouseIndexableData, ClickhouseRowExt}, FailedCommit, }, Quantities, @@ -78,7 +79,9 @@ pub struct ClickhouseInserter { _metrics_phantom: std::marker::PhantomData, } -impl ClickhouseInserter { +impl + ClickhouseInserter +{ pub fn new(inner: Inserter, backup_tx: mpsc::Sender>) -> Self { let rows_backup = Vec::new(); Self { @@ -91,12 +94,12 @@ impl ClickhouseInserter ClickhouseInserter { if quantities == Quantities::ZERO.into() { - tracing::trace!(target: TARGET, order = T::ORDER, "committed to inserter"); + tracing::trace!(target: TARGET, table = T::TABLE_NAME, "committed to inserter"); } else { - tracing::debug!(target: TARGET, order = T::ORDER, ?quantities, "inserted batch to clickhouse"); + tracing::debug!(target: TARGET, table = T::TABLE_NAME, ?quantities, "inserted batch to clickhouse"); MetricsType::process_quantities(&quantities.into()); MetricsType::record_batch_commit_time(start.elapsed()); // Clear the backup rows. @@ -125,13 +128,13 @@ impl ClickhouseInserter { MetricsType::increment_commit_failures(e.to_string()); - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit bundle to clickhouse"); + tracing::error!(target: TARGET, table = T::TABLE_NAME, ?e, "failed to commit bundle to clickhouse"); let rows = std::mem::take(&mut self.rows_backup); let failed_commit = FailedCommit::new(rows, pending); if let Err(e) = self.backup_tx.try_send(failed_commit) { - tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to send rows backup"); + tracing::error!(target: TARGET, table = T::TABLE_NAME, ?e, "failed to send rows backup"); } } } @@ -146,7 +149,7 @@ impl ClickhouseInserter std::fmt::Debug for ClickhouseInserter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ClickhouseInserter") - .field("inserter", &T::ORDER.to_string()) + .field("inserter", &T::TABLE_NAME.to_string()) .field("rows_backup_len", &self.rows_backup.len()) .finish() } @@ -154,7 +157,7 @@ impl std::fmt::Debug for ClickhouseInserter { +pub struct InserterRunner { /// The channel from which we can receive new orders to index. rx: mpsc::Receiver, /// The underlying Clickhouse inserter. @@ -163,18 +166,18 @@ pub struct InserterRunner { builder_name: String, } -impl std::fmt::Debug +impl std::fmt::Debug for InserterRunner { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("InserterRunner") - .field("inserter", &T::ORDER.to_string()) + .field("inserter", &T::DATA_NAME.to_string()) .field("rx", &self.rx) .finish() } } -impl InserterRunner { +impl InserterRunner { pub fn new( rx: mpsc::Receiver, inserter: ClickhouseInserter, @@ -188,27 +191,59 @@ impl InserterRunner ClickhouseResult { + async fn end(self) -> ClickhouseResult { self.inserter.end().await } + + /// Spawns the inserter runner on the given task executor. + pub fn spawn(mut self, task_executor: &TaskExecutor, name: String, target: &'static str) + where + T: Send + Sync + 'static, + MetricsType: Send + Sync + 'static, + for<'a> ::Value<'a>: Sync, + { + task_executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { + let mut shutdown_guard = None; + tokio::select! { + _ = self.run_loop() => { + tracing::info!(target, "clickhouse {name} indexer channel closed"); + } + guard = shutdown => { + tracing::info!(target, "Received shutdown for {name} indexer, performing cleanup"); + shutdown_guard = Some(guard); + }, + } + + match self.end().await { + Ok(quantities) => { + tracing::info!(target, ?quantities, "finalized clickhouse {} inserter", name); + } + Err(e) => { + tracing::error!(target, ?e, "failed to write end insertion of {} to indexer", name); + } + } + drop(shutdown_guard); + + }); + } } /// The configuration used in a [`ClickhouseClient`]. diff --git a/crates/rbuilder-utils/src/clickhouse/mod.rs b/crates/rbuilder-utils/src/clickhouse/mod.rs index 0176f1472..faa04805b 100644 --- a/crates/rbuilder-utils/src/clickhouse/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse/mod.rs @@ -1,6 +1,21 @@ pub mod backup; pub mod indexer; -use serde::{Deserialize, Serialize}; +pub mod serde; +use std::{path::PathBuf, time::Duration}; + +use ::serde::{Deserialize, Serialize}; +use clickhouse::Client; +use reth_tasks::TaskExecutor; +use tokio::sync::mpsc; + +use crate::clickhouse::{ + backup::{ + metrics::Metrics, + primitives::{ClickhouseIndexableData, ClickhouseRowExt}, + Backup, DiskBackup, DiskBackupConfig, MemoryBackupConfig, + }, + indexer::{default_inserter, ClickhouseInserter, InserterRunner}, +}; /// Equilalent of `clickhouse::inserter::Quantities` with more traits derived. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] @@ -38,3 +53,54 @@ impl From for clickhouse::inserter::Quantities { } } } + +/// Size of the channel buffer for the backup input channel. +/// If we get more than this number of failed commits queued the inserter thread will block. +const BACKUP_INPUT_CHANNEL_BUFFER_SIZE: usize = 128; +const CLICKHOUSE_INSERT_TIMEOUT: Duration = Duration::from_secs(2); +const CLICKHOUSE_END_TIMEOUT: Duration = Duration::from_secs(4); + +/// Main func to spawn the clickhouse inserter and backup tasks. +pub fn spawn_clickhouse_inserter_and_backup< + DataType: ClickhouseIndexableData + Send + Sync + 'static, + RowType: ClickhouseRowExt, + MetricsType: Metrics + Send + Sync + 'static, +>( + client: &Client, + data_rx: mpsc::Receiver, + task_executor: &TaskExecutor, + clickhouse_table_name: String, + builder_name: String, + disk_database_path: Option>, + disk_max_size_bytes: Option, + memory_max_size_bytes: u64, + tracing_target: &'static str, +) where + for<'a> ::Value<'a>: Sync, +{ + let backup_table_name = RowType::TABLE_NAME.to_string(); + let disk_backup = DiskBackup::new( + DiskBackupConfig::new() + .with_path(disk_database_path) + .with_max_size_bytes(disk_max_size_bytes), // 1 GiB + task_executor, + ) + .expect("could not create disk backup"); + let (failed_commit_tx, failed_commit_rx) = mpsc::channel(BACKUP_INPUT_CHANNEL_BUFFER_SIZE); + let inserter = default_inserter(&client, &clickhouse_table_name); + let inserter = ClickhouseInserter::<_, MetricsType>::new(inserter, failed_commit_tx); + // Node name is not used for Blocks. + let inserter_runner = InserterRunner::new(data_rx, inserter, builder_name); + + let backup = Backup::<_, MetricsType>::new( + failed_commit_rx, + client.inserter(&clickhouse_table_name).with_timeouts( + Some(CLICKHOUSE_INSERT_TIMEOUT), + Some(CLICKHOUSE_END_TIMEOUT), + ), + disk_backup.clone(), + ) + .with_memory_backup_config(MemoryBackupConfig::new(memory_max_size_bytes)); + inserter_runner.spawn(&task_executor, backup_table_name.clone(), tracing_target); + backup.spawn(&task_executor, backup_table_name, tracing_target); +} diff --git a/crates/rbuilder-utils/src/clickhouse/serde.rs b/crates/rbuilder-utils/src/clickhouse/serde.rs new file mode 100644 index 000000000..ab45efb98 --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse/serde.rs @@ -0,0 +1,82 @@ +//! Serde helpers for Clickhouse. +pub mod u256 { + use alloy_primitives::U256; + use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize as _}; + + /// EVM U256 is represented in big-endian, but ClickHouse expects little-endian. + pub fn serialize(u256: &U256, serializer: S) -> Result { + let buf: [u8; 32] = u256.to_le_bytes(); + buf.serialize(serializer) + } + + /// Deserialize U256 following ClickHouse RowBinary format. + /// + /// ClickHouse stores U256 in little-endian, we have to convert it back to big-endian. + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let buf: [u8; 32] = Deserialize::deserialize(deserializer)?; + Ok(U256::from_le_bytes(buf)) + } +} + +pub mod option_u256 { + use alloy_primitives::U256; + use serde::{de::Deserializer, ser::Serializer, Deserialize}; + + pub fn serialize( + maybe_u256: &Option, + serializer: S, + ) -> Result { + if let Some(u256) = maybe_u256 { + let buf: [u8; 32] = u256.to_le_bytes(); + serializer.serialize_some(&buf) + } else { + serializer.serialize_none() + } + } + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + let option: Option<[u8; 32]> = Deserialize::deserialize(deserializer)?; + Ok(option.map(U256::from_le_bytes)) + } +} + +pub mod vec_u256 { + use alloy_primitives::U256; + use serde::{ + de::Deserializer, + ser::{SerializeSeq, Serializer}, + Deserialize, + }; + + /// Serialize Vec following ClickHouse RowBinary format. + /// + /// EVM U256 is represented in big-endian, but ClickHouse expects little-endian. + pub fn serialize(u256es: &[U256], serializer: S) -> Result { + // It consists of a LEB128 length prefix followed by the raw bytes of each U256 in + // little-endian order. + + // + let mut seq = serializer.serialize_seq(Some(u256es.len()))?; + for u256 in u256es { + let buf: [u8; 32] = u256.to_le_bytes(); + seq.serialize_element(&buf)?; + } + seq.end() + } + + /// Deserialize Vec following ClickHouse RowBinary format. + /// + /// ClickHouse stores U256 in little-endian, we have to convert it back to big-endian. + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + let vec: Vec<[u8; 32]> = Deserialize::deserialize(deserializer)?; + Ok(vec.into_iter().map(U256::from_le_bytes).collect()) + } +} diff --git a/crates/rbuilder-utils/src/lib.rs b/crates/rbuilder-utils/src/lib.rs index 3de0c1df0..7badcf364 100644 --- a/crates/rbuilder-utils/src/lib.rs +++ b/crates/rbuilder-utils/src/lib.rs @@ -2,6 +2,7 @@ pub mod backoff; pub mod clickhouse; pub mod format; pub mod metrics; +pub mod serde; pub mod tasks { pub use reth_tasks::*; } diff --git a/crates/rbuilder-utils/src/serde/mod.rs b/crates/rbuilder-utils/src/serde/mod.rs new file mode 100644 index 000000000..c25f0c207 --- /dev/null +++ b/crates/rbuilder-utils/src/serde/mod.rs @@ -0,0 +1 @@ +//! Non specific serde helpers. From a1ab32810bd63882b567aa93351a3c3cec5afe72 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Tue, 4 Nov 2025 16:31:24 -0300 Subject: [PATCH 02/26] building the block --- crates/rbuilder-operator/src/clickhouse.rs | 127 ++++++++++++++++++++- 1 file changed, 124 insertions(+), 3 deletions(-) diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index 720d04f36..bea37b909 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -1,12 +1,30 @@ //! Clickhouse integration to save all the blocks we build and submit to relays. -use alloy_primitives::U256; -use clickhouse::Row; +use std::time::Duration; + +use alloy_primitives::{utils::format_ether, U256}; +use clickhouse::{Client, Row}; +use rbuilder::{ + building::BuiltBlockTrace, + live_builder::{ + block_output::bidding_service_interface::BidObserver, payload_events::MevBoostSlotData, + }, +}; +use rbuilder_primitives::mev_boost::SubmitBlockRequest; use rbuilder_utils::clickhouse::{ - backup::primitives::{ClickhouseIndexableData, ClickhouseRowExt}, + backup::{ + metrics::NullMetrics, + primitives::{ClickhouseIndexableData, ClickhouseRowExt}, + }, serde::{option_u256, vec_u256}, + spawn_clickhouse_inserter_and_backup, }; use serde::{Deserialize, Serialize}; +use tokio::sync::mpsc; +use tokio_util::sync::CancellationToken; +use tracing::error; + +use crate::flashbots_config::BuiltBlocksClickhouseConfig; /// BlockRow to insert in clickhouse and also as entry type for the indexer since the BlockRow is made from a few &objects so it makes no sense to have a Block type and copy all the fields. #[derive(Debug, Clone, Serialize, Deserialize, Row)] @@ -72,3 +90,106 @@ impl ClickhouseIndexableData for BlockRow { self } } + +const KILO: u64 = 1024; +const MEGA: u64 = KILO * KILO; + +// Super worst scenario we submit 500 blocks per second so we have 2 seconds of buffer. +const BUILT_BLOCKS_CHANNEL_SIZE: usize = 1024; +const BLOCKS_TABLE_NAME: &str = "blocks"; +const DEFAULT_MAX_DISK_SIZE_MB: u64 = 10 * KILO; +const DEFAULT_MAX_MEMORY_SIZE_MB: u64 = 1 * KILO; +#[derive(Debug)] +pub struct BuiltBlocksWriter { + blocks_tx: mpsc::Sender, +} + +impl BuiltBlocksWriter { + pub fn new(config: BuiltBlocksClickhouseConfig, cancellation_token: CancellationToken) -> Self { + let client = Client::default() + .with_url(config.host) + .with_database(config.database) + .with_user(config.username) + .with_password(config.password) + .with_validation(false); // CRITICAL for U256 serialization. + + let task_manager = rbuilder_utils::tasks::TaskManager::current(); + let task_executor = task_manager.executor(); + + let (block_tx, block_rx) = mpsc::channel::(BUILT_BLOCKS_CHANNEL_SIZE); + spawn_clickhouse_inserter_and_backup::( + &client, + block_rx, + &task_executor, + BLOCKS_TABLE_NAME.to_string(), + "".to_string(), // No buildername used in blocks table. + Some(config.disk_database_path), + Some(config.disk_max_size_mb.unwrap_or(DEFAULT_MAX_DISK_SIZE_MB) * MEGA), + config + .memory_max_size_mb + .unwrap_or(DEFAULT_MAX_MEMORY_SIZE_MB) + * MEGA, + BLOCKS_TABLE_NAME, + ); + // Task to forward the cancellation to the task_manager. + tokio::spawn(async move { + cancellation_token.cancelled().await; + // @Pending: Needed to avoid losing blocks but we should try to avoid this. + tokio::time::sleep(Duration::from_secs(1)).await; + task_manager.graceful_shutdown_with_timeout(Duration::from_secs(5)); + }); + Self { + blocks_tx: block_tx, + } + } +} + +impl BidObserver for BuiltBlocksWriter { + fn block_submitted( + &self, + slot_data: &MevBoostSlotData, + submit_block_request: &SubmitBlockRequest, + built_block_trace: &BuiltBlockTrace, + builder_name: String, + best_bid_value: U256, + ) { + let submit_trace = submit_block_request.bid_trace(); + let execution_payload_v1 = submit_block_request.execution_payload_v1(); + let block_row = BlockRow { + block_number: slot_data.block(), + profit: format_ether(built_block_trace.true_bid_value), + slot: slot_data.slot(), + hash: execution_payload_v1.block_hash.to_string(), + gas_limit: submit_trace.gas_limit, + gas_used: submit_trace.gas_used, + base_fee: execution_payload_v1 + .base_fee_per_gas + .try_into() + .unwrap_or_default(), + parent_hash: submit_trace.parent_hash.to_string(), + proposer_pubkey: "0x123...".to_string(), + proposer_fee_recipient: "0x456...".to_string(), + builder_pubkey: "0x789...".to_string(), + timestamp: 1699999999, + timestamp_datetime: 1699999999000000, + orders_closed_at: 1699999998000000, + sealed_at: 1699999998500000, + algorithm: "greedy".to_string(), + true_value: Some(U256::from(123u64)), + best_relay_value: Some(U256::from(1234u64)), + block_value: None, + used_bundle_hashes: vec!["0xbundle1".to_string()], + used_bundle_uuids: vec!["uuid-1".to_string()], + used_sbundles_hashes: vec!["0xsbundle1".to_string()], + delayed_payment_sources: vec!["relay1".to_string()], + delayed_payment_values: vec![U256::from(123456u64), U256::from(1234567u64)], + delayed_payment_addresses: vec!["0xaddr1".to_string()], + }; + let blocks_tx = self.blocks_tx.clone(); + tokio::spawn(async move { + if let Err(error) = blocks_tx.send(block_row).await { + error!(?error, "Failed to send block to clickhouse"); + } + }); + } +} From 4927a00b1be3befc15903004d9bea11b434cdf8d Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Wed, 5 Nov 2025 10:21:07 -0300 Subject: [PATCH 03/26] new cfg --- .../rbuilder-operator/src/flashbots_config.rs | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/crates/rbuilder-operator/src/flashbots_config.rs b/crates/rbuilder-operator/src/flashbots_config.rs index 6363fa399..3ebbb1127 100644 --- a/crates/rbuilder-operator/src/flashbots_config.rs +++ b/crates/rbuilder-operator/src/flashbots_config.rs @@ -43,11 +43,12 @@ use crate::{ SIGNED_BLOCK_CONSUME_BUILT_BLOCK_METHOD, }, build_info::rbuilder_version, + clickhouse::BuiltBlocksWriter, true_block_value_push::best_true_value_observer::BestTrueValueObserver, }; use clickhouse::Client; -use std::sync::Arc; +use std::{path::PathBuf, sync::Arc}; #[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)] pub struct ClickhouseConfig { @@ -70,6 +71,20 @@ struct TBVPushRedisConfig { pub channel: String, } +/// Config used to record built blocks to clickhouse using a local +/// storage on errors. +#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)] +pub struct BuiltBlocksClickhouseConfig { + /// clickhouse host url (starts with http/https) + pub host: String, + pub database: String, + pub username: String, + pub password: String, + pub disk_database_path: PathBuf, + pub disk_max_size_mb: Option, + pub memory_max_size_mb: Option, +} + #[serde_as] #[derive(Debug, Clone, Deserialize, PartialEq, Derivative)] #[serde(default, deny_unknown_fields)] @@ -81,6 +96,8 @@ pub struct FlashbotsConfig { #[serde(flatten)] pub l1_config: L1Config, + /// Clickhouse config for fetching blocks from clickhouse for backtesting. + /// This should not be here.... #[serde(flatten)] clickhouse: ClickhouseConfig, @@ -111,6 +128,9 @@ pub struct FlashbotsConfig { /// For production we always need some tbv push (since it's used by smart-multiplexing.) so: /// !Some(key_registration_url) => Some(tbv_push_redis) tbv_push_redis: Option, + + /// Should always be set on buildernet. + built_blocks_clickhouse_config: Option, } impl LiveBuilderConfig for FlashbotsConfig { @@ -279,8 +299,17 @@ impl FlashbotsConfig { /// - Secure block processor client (using block_processor_key to sign) fn create_block_processor_client( &self, + cancellation_token: &CancellationToken, block_processor_key: Option, ) -> eyre::Result>> { + if let Some(built_blocks_clickhouse_config) = &self.built_blocks_clickhouse_config { + let writer = BuiltBlocksWriter::new( + built_blocks_clickhouse_config.clone(), + cancellation_token.clone(), + ); + return Ok(Some(Box::new(writer))); + } + if let Some(url) = &self.blocks_processor_url { let bid_observer: Box = if let Some( block_processor_key, @@ -326,7 +355,8 @@ impl FlashbotsConfig { }; let bid_observer = RbuilderOperatorBidObserver { - block_processor: self.create_block_processor_client(block_processor_key.clone())?, + block_processor: self + .create_block_processor_client(cancellation_token, block_processor_key.clone())?, tbv_pusher: self.create_tbv_pusher(block_processor_key, cancellation_token)?, }; Ok(Box::new(bid_observer)) From b540ce9bd61ce3842edaeb0959f078c03f6a8578 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Wed, 5 Nov 2025 17:23:12 -0300 Subject: [PATCH 04/26] working --- crates/rbuilder-operator/src/clickhouse.rs | 164 +++++++++++++++----- crates/rbuilder-utils/src/clickhouse/mod.rs | 7 +- 2 files changed, 129 insertions(+), 42 deletions(-) diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index bea37b909..4ecfc87be 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -2,7 +2,7 @@ use std::time::Duration; -use alloy_primitives::{utils::format_ether, U256}; +use alloy_primitives::{utils::format_ether, Address, U256}; use clickhouse::{Client, Row}; use rbuilder::{ building::BuiltBlockTrace, @@ -10,7 +10,7 @@ use rbuilder::{ block_output::bidding_service_interface::BidObserver, payload_events::MevBoostSlotData, }, }; -use rbuilder_primitives::mev_boost::SubmitBlockRequest; +use rbuilder_primitives::{mev_boost::SubmitBlockRequest, Order, OrderId}; use rbuilder_utils::clickhouse::{ backup::{ metrics::NullMetrics, @@ -20,6 +20,7 @@ use rbuilder_utils::clickhouse::{ spawn_clickhouse_inserter_and_backup, }; use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; use tokio::sync::mpsc; use tokio_util::sync::CancellationToken; use tracing::error; @@ -94,11 +95,12 @@ impl ClickhouseIndexableData for BlockRow { const KILO: u64 = 1024; const MEGA: u64 = KILO * KILO; -// Super worst scenario we submit 500 blocks per second so we have 2 seconds of buffer. -const BUILT_BLOCKS_CHANNEL_SIZE: usize = 1024; +// Super worst scenario we submit 500 blocks per second so we have 10 seconds of buffer. +// After this having this queued blocks we will start to drop. BlockRow is small enough (in the order of 10K, only hashes/ids, not full orders) so 5K BlockRows is not too much memory. +const BUILT_BLOCKS_CHANNEL_SIZE: usize = 5 * 1024; const BLOCKS_TABLE_NAME: &str = "blocks"; const DEFAULT_MAX_DISK_SIZE_MB: u64 = 10 * KILO; -const DEFAULT_MAX_MEMORY_SIZE_MB: u64 = 1 * KILO; +const DEFAULT_MAX_MEMORY_SIZE_MB: u64 = KILO; #[derive(Debug)] pub struct BuiltBlocksWriter { blocks_tx: mpsc::Sender, @@ -144,6 +146,69 @@ impl BuiltBlocksWriter { } } +fn offset_date_to_clickhouse_timestamp(date: OffsetDateTime) -> i64 { + (date.unix_timestamp_nanos() / 1000) as i64 +} + +fn get_used_sbundles_hashes(built_block_trace: &BuiltBlockTrace) -> Vec { + built_block_trace + .included_orders + .iter() + .flat_map(|exec_result| { + if let Order::ShareBundle(sbundle) = &exec_result.order { + // don't like having special cases (merged vs not merged), can we improve this? + if sbundle.is_merged_order() { + exec_result + .original_order_ids + .iter() + .map(|id| id.to_string()) + .collect() + } else if exec_result.tx_infos.is_empty() { + // non merged empty execution sbundle + vec![] + } else { + // non merged non empty execution sbundle + vec![exec_result.order.id().to_string()] + } + } else { + Vec::new() + } + }) + .collect() +} + +const MEV_VIRTUAL_BLOCKER_SOURCE: &str = "mev_blocker"; +const MEV_VIRTUAL_ADDRESS: Address = Address::ZERO; + +/// (sources, values, addresses) +fn get_delayed_payments( + built_block_trace: &BuiltBlockTrace, +) -> (Vec, Vec, Vec
) { + let mut sources = Vec::new(); + let mut values = Vec::new(); + let mut addresses = Vec::new(); + for res in &built_block_trace.included_orders { + if let Some(delayed_kickback) = &res.delayed_kickback { + if !delayed_kickback.should_pay_in_block { + match res.order.id() { + OrderId::Bundle(uuid) => { + sources.push(uuid.to_string()); + values.push(delayed_kickback.payout_value); + addresses.push(delayed_kickback.recipient); + } + _ => { + error!(order = ?res.order.id(), "Delayed kickback is found for non-bundle"); + } + } + } + } + } + sources.push(MEV_VIRTUAL_BLOCKER_SOURCE.into()); + values.push(built_block_trace.mev_blocker_price); + addresses.push(MEV_VIRTUAL_ADDRESS); + (sources, values, addresses) +} + impl BidObserver for BuiltBlocksWriter { fn block_submitted( &self, @@ -153,42 +218,63 @@ impl BidObserver for BuiltBlocksWriter { builder_name: String, best_bid_value: U256, ) { - let submit_trace = submit_block_request.bid_trace(); - let execution_payload_v1 = submit_block_request.execution_payload_v1(); - let block_row = BlockRow { - block_number: slot_data.block(), - profit: format_ether(built_block_trace.true_bid_value), - slot: slot_data.slot(), - hash: execution_payload_v1.block_hash.to_string(), - gas_limit: submit_trace.gas_limit, - gas_used: submit_trace.gas_used, - base_fee: execution_payload_v1 - .base_fee_per_gas - .try_into() - .unwrap_or_default(), - parent_hash: submit_trace.parent_hash.to_string(), - proposer_pubkey: "0x123...".to_string(), - proposer_fee_recipient: "0x456...".to_string(), - builder_pubkey: "0x789...".to_string(), - timestamp: 1699999999, - timestamp_datetime: 1699999999000000, - orders_closed_at: 1699999998000000, - sealed_at: 1699999998500000, - algorithm: "greedy".to_string(), - true_value: Some(U256::from(123u64)), - best_relay_value: Some(U256::from(1234u64)), - block_value: None, - used_bundle_hashes: vec!["0xbundle1".to_string()], - used_bundle_uuids: vec!["uuid-1".to_string()], - used_sbundles_hashes: vec!["0xsbundle1".to_string()], - delayed_payment_sources: vec!["relay1".to_string()], - delayed_payment_values: vec![U256::from(123456u64), U256::from(1234567u64)], - delayed_payment_addresses: vec!["0xaddr1".to_string()], - }; + let submit_block_request = submit_block_request.clone(); + let built_block_trace = built_block_trace.clone(); + let slot_data = slot_data.clone(); let blocks_tx = self.blocks_tx.clone(); tokio::spawn(async move { - if let Err(error) = blocks_tx.send(block_row).await { - error!(?error, "Failed to send block to clickhouse"); + let submit_trace = submit_block_request.bid_trace(); + let execution_payload_v1 = submit_block_request.execution_payload_v1(); + let mut used_bundle_hashes = Vec::new(); + let mut used_bundle_uuids = Vec::new(); + for res in &built_block_trace.included_orders { + if let Order::Bundle(bundle) = &res.order { + used_bundle_hashes + .push(bundle.external_hash.unwrap_or(bundle.hash).to_string()); + used_bundle_uuids.push(bundle.uuid.to_string()); + } + } + let used_sbundles_hashes = get_used_sbundles_hashes(&built_block_trace); + let (delayed_payment_sources, delayed_payment_values, delayed_payment_addresses) = + get_delayed_payments(&built_block_trace); + let delayed_payment_addresses = delayed_payment_addresses + .iter() + .map(|address| address.to_string()) + .collect(); + let block_row = BlockRow { + block_number: slot_data.block(), + profit: format_ether(built_block_trace.true_bid_value), + slot: slot_data.slot(), + hash: execution_payload_v1.block_hash.to_string(), + gas_limit: submit_trace.gas_limit, + gas_used: submit_trace.gas_used, + base_fee: execution_payload_v1 + .base_fee_per_gas + .try_into() + .unwrap_or_default(), + parent_hash: submit_trace.parent_hash.to_string(), + proposer_pubkey: submit_trace.proposer_pubkey.to_string(), + proposer_fee_recipient: submit_trace.proposer_fee_recipient.to_string(), + builder_pubkey: submit_trace.builder_pubkey.to_string(), + timestamp: execution_payload_v1.timestamp, + timestamp_datetime: execution_payload_v1.timestamp as i64 * 1_000_000, + orders_closed_at: offset_date_to_clickhouse_timestamp( + built_block_trace.orders_closed_at, + ), + sealed_at: offset_date_to_clickhouse_timestamp(built_block_trace.orders_sealed_at), + algorithm: builder_name, + true_value: Some(built_block_trace.true_bid_value), + best_relay_value: Some(best_bid_value), + block_value: Some(submit_trace.value), + used_bundle_hashes, + used_bundle_uuids, + used_sbundles_hashes, + delayed_payment_sources, + delayed_payment_values, + delayed_payment_addresses, + }; + if let Err(err) = blocks_tx.try_send(block_row) { + error!(?err, "Failed to send block to clickhouse"); } }); } diff --git a/crates/rbuilder-utils/src/clickhouse/mod.rs b/crates/rbuilder-utils/src/clickhouse/mod.rs index faa04805b..c4580d982 100644 --- a/crates/rbuilder-utils/src/clickhouse/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse/mod.rs @@ -61,6 +61,7 @@ const CLICKHOUSE_INSERT_TIMEOUT: Duration = Duration::from_secs(2); const CLICKHOUSE_END_TIMEOUT: Duration = Duration::from_secs(4); /// Main func to spawn the clickhouse inserter and backup tasks. +#[allow(clippy::too_many_arguments)] pub fn spawn_clickhouse_inserter_and_backup< DataType: ClickhouseIndexableData + Send + Sync + 'static, RowType: ClickhouseRowExt, @@ -87,7 +88,7 @@ pub fn spawn_clickhouse_inserter_and_backup< ) .expect("could not create disk backup"); let (failed_commit_tx, failed_commit_rx) = mpsc::channel(BACKUP_INPUT_CHANNEL_BUFFER_SIZE); - let inserter = default_inserter(&client, &clickhouse_table_name); + let inserter = default_inserter(client, &clickhouse_table_name); let inserter = ClickhouseInserter::<_, MetricsType>::new(inserter, failed_commit_tx); // Node name is not used for Blocks. let inserter_runner = InserterRunner::new(data_rx, inserter, builder_name); @@ -101,6 +102,6 @@ pub fn spawn_clickhouse_inserter_and_backup< disk_backup.clone(), ) .with_memory_backup_config(MemoryBackupConfig::new(memory_max_size_bytes)); - inserter_runner.spawn(&task_executor, backup_table_name.clone(), tracing_target); - backup.spawn(&task_executor, backup_table_name, tracing_target); + inserter_runner.spawn(task_executor, backup_table_name.clone(), tracing_target); + backup.spawn(task_executor, backup_table_name, tracing_target); } From 2ad0dccb363278787df244ef440a5fd19c9ad0f7 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Wed, 5 Nov 2025 17:45:08 -0300 Subject: [PATCH 05/26] merge fixes --- crates/rbuilder-operator/src/clickhouse.rs | 33 +++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index 4ecfc87be..d1fc0be87 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -1,8 +1,9 @@ //! Clickhouse integration to save all the blocks we build and submit to relays. -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use alloy_primitives::{utils::format_ether, Address, U256}; +use alloy_rpc_types_beacon::relay::SubmitBlockRequest as AlloySubmitBlockRequest; use clickhouse::{Client, Row}; use rbuilder::{ building::BuiltBlockTrace, @@ -10,7 +11,7 @@ use rbuilder::{ block_output::bidding_service_interface::BidObserver, payload_events::MevBoostSlotData, }, }; -use rbuilder_primitives::{mev_boost::SubmitBlockRequest, Order, OrderId}; +use rbuilder_primitives::{Order, OrderId}; use rbuilder_utils::clickhouse::{ backup::{ metrics::NullMetrics, @@ -213,18 +214,30 @@ impl BidObserver for BuiltBlocksWriter { fn block_submitted( &self, slot_data: &MevBoostSlotData, - submit_block_request: &SubmitBlockRequest, - built_block_trace: &BuiltBlockTrace, + submit_block_request: Arc, + built_block_trace: Arc, builder_name: String, best_bid_value: U256, ) { - let submit_block_request = submit_block_request.clone(); - let built_block_trace = built_block_trace.clone(); - let slot_data = slot_data.clone(); + let slot = slot_data.slot(); + let block_number = slot_data.block(); let blocks_tx = self.blocks_tx.clone(); tokio::spawn(async move { let submit_trace = submit_block_request.bid_trace(); - let execution_payload_v1 = submit_block_request.execution_payload_v1(); + let execution_payload_v1 = match submit_block_request.as_ref() { + AlloySubmitBlockRequest::Capella(request) => { + &request.execution_payload.payload_inner + } + AlloySubmitBlockRequest::Deneb(request) => { + &request.execution_payload.payload_inner.payload_inner + } + AlloySubmitBlockRequest::Electra(request) => { + &request.execution_payload.payload_inner.payload_inner + } + AlloySubmitBlockRequest::Fulu(request) => { + &request.execution_payload.payload_inner.payload_inner + } + }; let mut used_bundle_hashes = Vec::new(); let mut used_bundle_uuids = Vec::new(); for res in &built_block_trace.included_orders { @@ -242,9 +255,9 @@ impl BidObserver for BuiltBlocksWriter { .map(|address| address.to_string()) .collect(); let block_row = BlockRow { - block_number: slot_data.block(), + block_number, profit: format_ether(built_block_trace.true_bid_value), - slot: slot_data.slot(), + slot, hash: execution_payload_v1.block_hash.to_string(), gas_limit: submit_trace.gas_limit, gas_used: submit_trace.gas_used, From 5fcccb324b144b1313519be12f2cd5b88b07a561 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Wed, 12 Nov 2025 10:31:45 -0300 Subject: [PATCH 06/26] fields fixed --- crates/rbuilder-operator/src/clickhouse.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index d1fc0be87..648f8badf 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -252,11 +252,11 @@ impl BidObserver for BuiltBlocksWriter { get_delayed_payments(&built_block_trace); let delayed_payment_addresses = delayed_payment_addresses .iter() - .map(|address| address.to_string()) + .map(|address| address.to_string().to_lowercase()) .collect(); let block_row = BlockRow { block_number, - profit: format_ether(built_block_trace.true_bid_value), + profit: format_ether(submit_trace.value), slot, hash: execution_payload_v1.block_hash.to_string(), gas_limit: submit_trace.gas_limit, From b0e392a3d2b7dfd4b8d40ced07e42d281c2d14b2 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 13 Nov 2025 12:44:54 -0300 Subject: [PATCH 07/26] killed blockprocessor --- .../rbuilder-operator/src/blocks_processor.rs | 502 ------------------ crates/rbuilder-operator/src/clickhouse.rs | 4 +- .../rbuilder-operator/src/flashbots_config.rs | 27 +- crates/rbuilder-operator/src/lib.rs | 1 - 4 files changed, 4 insertions(+), 530 deletions(-) delete mode 100644 crates/rbuilder-operator/src/blocks_processor.rs diff --git a/crates/rbuilder-operator/src/blocks_processor.rs b/crates/rbuilder-operator/src/blocks_processor.rs deleted file mode 100644 index 59180d211..000000000 --- a/crates/rbuilder-operator/src/blocks_processor.rs +++ /dev/null @@ -1,502 +0,0 @@ -use alloy_primitives::{Address, BlockHash, B256, U256}; -use alloy_rpc_types_beacon::relay::SubmitBlockRequest as AlloySubmitBlockRequest; -use exponential_backoff::Backoff; -use jsonrpsee::core::{client::ClientT, traits::ToRpcParams}; -use rbuilder::{ - building::BuiltBlockTrace, - live_builder::{ - block_output::bidding_service_interface::{BidObserver, RelaySet}, - payload_events::MevBoostSlotData, - }, - utils::error_storage::store_error_event, -}; -use rbuilder_primitives::{ - serialize::{RawBundle, RawShareBundle}, - Bundle, Order, OrderId, -}; -use serde::{Deserialize, Serialize}; -use serde_json::value::RawValue; -use serde_with::{serde_as, DisplayFromStr}; -use std::{sync::Arc, time::Duration}; -use time::format_description::well_known; -use tracing::{error, warn, Span}; - -use crate::metrics::inc_submit_block_errors; - -const BLOCK_PROCESSOR_ERROR_CATEGORY: &str = "block_processor"; -pub const SIGNED_BLOCK_CONSUME_BUILT_BLOCK_METHOD: &str = "flashbots_consumeBuiltBlockV3"; - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -struct UsedSbundle { - bundle: RawShareBundle, - success: bool, -} - -#[serde_as] -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -struct UsedBundle { - #[serde_as(as = "DisplayFromStr")] - mev_gas_price: U256, - #[serde_as(as = "DisplayFromStr")] - total_eth: U256, - #[serde_as(as = "DisplayFromStr")] - eth_send_to_coinbase: U256, - #[serde_as(as = "DisplayFromStr")] - total_gas_used: u64, - original_bundle: RawBundle, - #[serde_as(as = "DisplayFromStr")] - bundle_hash: B256, -} - -/// Header used by block_consumeBuiltBlockV2. Since docs are not up to date I copied RbuilderHeader from block-processor/ports/models.go (commit b341b35) -/// Based on alloy_primitives::Block -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[serde(rename_all = "camelCase")] -struct BlocksProcessorHeader { - pub hash: BlockHash, - pub gas_limit: U256, - pub gas_used: U256, - #[serde(skip_serializing_if = "Option::is_none")] - pub base_fee_per_gas: Option, - pub parent_hash: BlockHash, - pub timestamp: U256, - pub number: Option, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[serde(rename_all = "camelCase")] -pub struct BlockProcessorDelayedPayments { - pub source: String, - pub value: U256, - pub address: Address, -} - -type ConsumeBuiltBlockRequest = ( - BlocksProcessorHeader, - // ordersClosedAt - String, - // sealedAt - String, - // commitedBundles - Vec, - // allBundles - Vec, - // usedSbundles - Vec, - alloy_rpc_types_beacon::relay::BidTrace, - // builderName - String, - // trueBidValue - U256, - // bestBidValue - U256, - Vec, - // Relays - Vec, -); - -/// Struct to avoid copying ConsumeBuiltBlockRequest since HttpClient::request eats the parameter. -#[derive(Clone)] -struct ConsumeBuiltBlockRequestArc { - inner: Arc, -} - -impl ConsumeBuiltBlockRequestArc { - fn new(request: ConsumeBuiltBlockRequest) -> Self { - Self { - inner: Arc::new(request), - } - } - fn as_ref(&self) -> &ConsumeBuiltBlockRequest { - self.inner.as_ref() - } -} - -impl ToRpcParams for ConsumeBuiltBlockRequestArc { - fn to_rpc_params(self) -> Result>, jsonrpsee::core::Error> { - let json = serde_json::to_string(self.inner.as_ref()) - .map_err(jsonrpsee::core::Error::ParseError)?; - RawValue::from_string(json) - .map(Some) - .map_err(jsonrpsee::core::Error::ParseError) - } -} - -#[derive(Debug, Clone)] -pub struct BlocksProcessorClient { - client: HttpClientType, - consume_built_block_method: &'static str, -} - -/// RawBundle::encode_no_blobs but more compatible. -fn encode_bundle_for_blocks_processor(mut bundle: Bundle) -> RawBundle { - // set to 0 when none - bundle.block = bundle.block.or(Some(0)); - RawBundle::encode_no_blobs(bundle.clone()) -} - -impl BlocksProcessorClient { - pub fn new(client: HttpClientType, consume_built_block_method: &'static str) -> Self { - Self { - client, - consume_built_block_method, - } - } - pub async fn submit_built_block( - &self, - submit_block_request: &AlloySubmitBlockRequest, - built_block_trace: &BuiltBlockTrace, - builder_name: String, - best_bid_value: U256, - relays: RelaySet, - ) -> eyre::Result<()> { - let execution_payload_v1 = match submit_block_request { - AlloySubmitBlockRequest::Capella(request) => &request.execution_payload.payload_inner, - AlloySubmitBlockRequest::Deneb(request) => { - &request.execution_payload.payload_inner.payload_inner - } - AlloySubmitBlockRequest::Electra(request) => { - &request.execution_payload.payload_inner.payload_inner - } - AlloySubmitBlockRequest::Fulu(request) => { - &request.execution_payload.payload_inner.payload_inner - } - }; - let header = BlocksProcessorHeader { - hash: execution_payload_v1.block_hash, - gas_limit: U256::from(execution_payload_v1.gas_limit), - gas_used: U256::from(execution_payload_v1.gas_used), - base_fee_per_gas: Some(execution_payload_v1.base_fee_per_gas), - parent_hash: execution_payload_v1.parent_hash, - timestamp: U256::from(execution_payload_v1.timestamp), - number: Some(U256::from(execution_payload_v1.block_number)), - }; - let closed_at = built_block_trace - .orders_closed_at - .format(&well_known::Iso8601::DEFAULT)?; - let sealed_at = built_block_trace - .orders_sealed_at - .format(&well_known::Iso8601::DEFAULT)?; - - let committed_bundles = built_block_trace - .included_orders - .iter() - .filter_map(|res| { - if let Order::Bundle(bundle) = &res.order { - Some(UsedBundle { - mev_gas_price: res.inplace_sim.full_profit_info().mev_gas_price(), - total_eth: res.inplace_sim.full_profit_info().coinbase_profit(), - eth_send_to_coinbase: U256::ZERO, - total_gas_used: res.inplace_sim.gas_used(), - original_bundle: encode_bundle_for_blocks_processor(bundle.clone()), - bundle_hash: bundle.external_hash.unwrap_or(bundle.hash), - }) - } else { - None - } - }) - .collect::>(); - - let used_share_bundles = Self::get_used_sbundles(built_block_trace); - - let mut delayed_payments: Vec<_> = built_block_trace - .included_orders - .iter() - .filter_map(|res| { - let delayed_kickback = if let Some(k) = &res.delayed_kickback { - k - } else { - return None; - }; - - if delayed_kickback.should_pay_in_block { - return None; - } - - let bundle_uuid = match res.order.id() { - OrderId::Bundle(uuid) => uuid, - _ => { - error!(order = ?res.order.id(), "Delayed kickback is found for non-bundle"); - return None; - } - }; - - Some(BlockProcessorDelayedPayments { - source: bundle_uuid.to_string(), - value: delayed_kickback.payout_value, - address: delayed_kickback.recipient, - }) - }) - .collect(); - delayed_payments.push(BlockProcessorDelayedPayments { - source: "mev_blocker".into(), - value: built_block_trace.mev_blocker_price, - address: Address::ZERO, - }); - - let params: ConsumeBuiltBlockRequest = ( - header, - closed_at, - sealed_at, - committed_bundles, - Vec::::new(), - used_share_bundles, - submit_block_request.bid_trace().clone(), - builder_name, - built_block_trace.true_bid_value, - best_bid_value, - delayed_payments, - relays - .relays() - .iter() - .map(|relay| relay.to_string()) - .collect(), - ); - let request = ConsumeBuiltBlockRequestArc::new(params); - let backoff = backoff(); - let mut backoff_iter = backoff.iter(); - loop { - let sleep_time = backoff_iter.next(); - match self - .client - .request(self.consume_built_block_method, request.clone()) - .await - { - Ok(()) => { - return Ok(()); - } - Err(err) => match sleep_time { - Some(time) => { - warn!(?err, "Block processor returned error, retrying."); - tokio::time::sleep(time).await; - } - None => { - Self::handle_rpc_error(&err, request.as_ref()); - return Err(err.into()); - } - }, - } - } - } - - fn handle_rpc_error(err: &jsonrpsee::core::Error, request: &ConsumeBuiltBlockRequest) { - const RPC_ERROR_TEXT: &str = "Block processor RPC"; - match err { - jsonrpsee::core::Error::Call(error_object) => { - error!(err = ?error_object, kind = "error_returned", RPC_ERROR_TEXT); - store_error_event(BLOCK_PROCESSOR_ERROR_CATEGORY, &err.to_string(), request); - } - jsonrpsee::core::Error::Transport(_) => { - error!(err = ?err, kind = "transport", RPC_ERROR_TEXT); - store_error_event(BLOCK_PROCESSOR_ERROR_CATEGORY, &err.to_string(), request); - } - jsonrpsee::core::Error::ParseError(error) => { - error!(err = ?err, kind = "deserialize", RPC_ERROR_TEXT); - let error_txt = error.to_string(); - if !(error_txt.contains("504 Gateway Time-out") - || error_txt.contains("502 Bad Gateway")) - { - store_error_event(BLOCK_PROCESSOR_ERROR_CATEGORY, &err.to_string(), request); - } - } - _ => { - error!(err = ?err, kind = "other", RPC_ERROR_TEXT); - } - } - } - - /// Gets the UsedSbundle carefully considering virtual orders formed by other original orders. - fn get_used_sbundles(built_block_trace: &BuiltBlockTrace) -> Vec { - built_block_trace - .included_orders - .iter() - .flat_map(|exec_result| { - if let Order::ShareBundle(sbundle) = &exec_result.order { - // don't like having special cases (merged vs not merged), can we improve this? - let filtered_sbundles = if sbundle.is_merged_order() { - // We include only original orders that are contained in original_order_ids. - // If not contained in original_order_ids then the sub sbundle failed or was an empty execution. - sbundle - .original_orders - .iter() - .filter_map(|sub_order| { - if let Order::ShareBundle(sbundle) = sub_order { - if exec_result.original_order_ids.contains(&sub_order.id()) { - Some(sbundle) - } else { - None - } - } else { - None - } - }) - .collect() - } else if exec_result.tx_infos.is_empty() { - // non merged empty execution sbundle - vec![] - } else { - // non merged non empty execution sbundle - vec![sbundle] - }; - filtered_sbundles - .into_iter() - .map(|sbundle| UsedSbundle { - bundle: RawShareBundle::encode_no_blobs(sbundle.clone()), - success: true, - }) - .collect() - } else { - Vec::new() - } - }) - .collect::>() - } -} - -/// BidObserver sending all data to a BlocksProcessorClient -#[derive(Debug)] -pub struct BlocksProcessorClientBidObserver { - client: BlocksProcessorClient, -} - -impl BlocksProcessorClientBidObserver { - pub fn new(client: BlocksProcessorClient) -> Self { - Self { client } - } -} - -impl BidObserver - for BlocksProcessorClientBidObserver -{ - fn block_submitted( - &self, - _slot_data: &MevBoostSlotData, - submit_block_request: Arc, - built_block_trace: Arc, - builder_name: String, - best_bid_value: U256, - relays: &RelaySet, - ) { - let client = self.client.clone(); - let parent_span = Span::current(); - let relays = relays.clone(); - tokio::spawn(async move { - let block_processor_result = client - .submit_built_block( - &submit_block_request, - &built_block_trace, - builder_name, - best_bid_value, - relays, - ) - .await; - if let Err(err) = block_processor_result { - inc_submit_block_errors(); - warn!(parent: &parent_span, ?err, "Failed to submit block to the blocks api"); - } - }); - } -} - -// backoff is around 1 minute and total number of requests per payload will be 4 -// assuming 200 blocks per slot and if API is down we will max at around 1k of blocks in memory -fn backoff() -> Backoff { - let mut backoff = Backoff::new(3, Duration::from_secs(5), None); - backoff.set_factor(2); - backoff.set_jitter(0.1); - backoff -} - -#[cfg(test)] -mod tests { - use alloy_primitives::fixed_bytes; - use rbuilder_primitives::serialize::RawBundleMetadata; - use uuid::Uuid; - - use super::*; - - #[test] - fn backoff_total_time_assert() { - let mut requests = 0; - let mut total_sleep_time = Duration::default(); - let backoff = backoff(); - let backoff_iter = backoff.iter(); - for duration in backoff_iter { - requests += 1; - total_sleep_time += duration; - } - assert_eq!(requests, 4); - let total_sleep_time = total_sleep_time.as_secs(); - dbg!(total_sleep_time); - assert!(total_sleep_time > 40 && total_sleep_time < 90); - } - - #[test] - fn test_delayed_payment_serialize() { - let value = BlockProcessorDelayedPayments { - address: alloy_primitives::address!("93Ea7cB31f76B982601321b2A0d93Ec9A948236D"), - value: U256::from(16), - source: Uuid::try_parse("ff7b2232-b30d-4889-9258-c3632ba4bfc0") - .unwrap() - .to_string(), - }; - - let value_str = serde_json::to_string(&value).unwrap(); - - let expected_str = r#"{"source":"ff7b2232-b30d-4889-9258-c3632ba4bfc0","value":"0x10","address":"0x93ea7cb31f76b982601321b2a0d93ec9a948236d"}"#; - - assert_eq!(value_str, expected_str); - - let value = BlockProcessorDelayedPayments { - address: Address::ZERO, - value: U256::from(16), - source: "mev_blocker".into(), - }; - - let value_str = serde_json::to_string(&value).unwrap(); - - let expected_str = r#"{"source":"mev_blocker","value":"0x10","address":"0x0000000000000000000000000000000000000000"}"#; - - assert_eq!(value_str, expected_str); - } - - #[test] - fn test_used_bundle_serialize() { - let value = UsedBundle { - mev_gas_price: U256::from(100), - total_eth: U256::from(200), - eth_send_to_coinbase: U256::from(300), - total_gas_used: 21000, - original_bundle: RawBundle { - metadata: RawBundleMetadata { - version: None, - block_number: None, - reverting_tx_hashes: Vec::new(), - dropping_tx_hashes: Vec::new(), - replacement_uuid: None, - uuid: None, - signing_address: None, - refund_identity: None, - min_timestamp: None, - max_timestamp: None, - replacement_nonce: None, - refund_percent: None, - refund_recipient: None, - refund_tx_hashes: None, - delayed_refund: None, - bundle_hash: None, - }, - txs: Vec::new(), - }, - bundle_hash: fixed_bytes!( - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - ), - }; - let value_str = serde_json::to_string(&value).unwrap(); - - let expected_str = r#"{"mevGasPrice":"100","totalEth":"200","ethSendToCoinbase":"300","totalGasUsed":"21000","originalBundle":{"version":null,"blockNumber":null,"revertingTxHashes":[],"droppingTxHashes":[],"txs":[]},"bundleHash":"0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}"#; - assert_eq!(value_str, expected_str); - } -} diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index 648f8badf..39f95d17f 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -8,7 +8,8 @@ use clickhouse::{Client, Row}; use rbuilder::{ building::BuiltBlockTrace, live_builder::{ - block_output::bidding_service_interface::BidObserver, payload_events::MevBoostSlotData, + block_output::bidding_service_interface::{BidObserver, RelaySet}, + payload_events::MevBoostSlotData, }, }; use rbuilder_primitives::{Order, OrderId}; @@ -218,6 +219,7 @@ impl BidObserver for BuiltBlocksWriter { built_block_trace: Arc, builder_name: String, best_bid_value: U256, + _relays: &RelaySet, ) { let slot = slot_data.slot(); let block_number = slot_data.block(); diff --git a/crates/rbuilder-operator/src/flashbots_config.rs b/crates/rbuilder-operator/src/flashbots_config.rs index 6583abeb5..8e2c3fbfc 100644 --- a/crates/rbuilder-operator/src/flashbots_config.rs +++ b/crates/rbuilder-operator/src/flashbots_config.rs @@ -40,12 +40,7 @@ use url::Url; use crate::{ bidding_service_wrapper::client::bidding_service_client_adapter::BiddingServiceClientAdapter, - blocks_processor::{ - BlocksProcessorClient, BlocksProcessorClientBidObserver, - SIGNED_BLOCK_CONSUME_BUILT_BLOCK_METHOD, - }, - build_info::rbuilder_version, - clickhouse::BuiltBlocksWriter, + build_info::rbuilder_version, clickhouse::BuiltBlocksWriter, true_block_value_push::best_true_value_observer::BestTrueValueObserver, }; @@ -317,26 +312,6 @@ impl FlashbotsConfig { cancellation_token.clone(), ); return Ok(Some(Box::new(writer))); - } - - if let Some(url) = &self.blocks_processor_url { - let bid_observer: Box = if let Some( - block_processor_key, - ) = block_processor_key - { - let client = crate::signed_http_client::create_client( - url, - block_processor_key, - self.blocks_processor_max_request_size_bytes, - self.blocks_processor_max_concurrent_requests, - )?; - let block_processor = - BlocksProcessorClient::new(client, SIGNED_BLOCK_CONSUME_BUILT_BLOCK_METHOD); - Box::new(BlocksProcessorClientBidObserver::new(block_processor)) - } else { - eyre::bail!("Unsigned block processing is not supported: if blocks_processor_url is set, a block_processor_key must also be provided"); - }; - Ok(Some(bid_observer)) } else { if block_processor_key.is_some() { return Self::bail_blocks_processor_url_not_set(); diff --git a/crates/rbuilder-operator/src/lib.rs b/crates/rbuilder-operator/src/lib.rs index 6b3cd56c7..264bd7e90 100644 --- a/crates/rbuilder-operator/src/lib.rs +++ b/crates/rbuilder-operator/src/lib.rs @@ -1,5 +1,4 @@ pub mod bidding_service_wrapper; -pub mod blocks_processor; pub mod build_info; pub mod clickhouse; pub mod flashbots_config; From c4cc68aca36c4eb17f2f8a2195f461fdf718be3f Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 13 Nov 2025 16:32:07 -0300 Subject: [PATCH 08/26] metrics --- crates/rbuilder-operator/src/clickhouse.rs | 6 +- crates/rbuilder-operator/src/metrics.rs | 118 ++++++++++++++++++++- 2 files changed, 119 insertions(+), 5 deletions(-) diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index 39f95d17f..495c04629 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -19,7 +19,7 @@ use rbuilder_utils::clickhouse::{ primitives::{ClickhouseIndexableData, ClickhouseRowExt}, }, serde::{option_u256, vec_u256}, - spawn_clickhouse_inserter_and_backup, + spawn_clickhouse_inserter_and_backup, Quantities, }; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; @@ -27,7 +27,7 @@ use tokio::sync::mpsc; use tokio_util::sync::CancellationToken; use tracing::error; -use crate::flashbots_config::BuiltBlocksClickhouseConfig; +use crate::{flashbots_config::BuiltBlocksClickhouseConfig, metrics::ClickhouseMetrics}; /// BlockRow to insert in clickhouse and also as entry type for the indexer since the BlockRow is made from a few &objects so it makes no sense to have a Block type and copy all the fields. #[derive(Debug, Clone, Serialize, Deserialize, Row)] @@ -121,7 +121,7 @@ impl BuiltBlocksWriter { let task_executor = task_manager.executor(); let (block_tx, block_rx) = mpsc::channel::(BUILT_BLOCKS_CHANNEL_SIZE); - spawn_clickhouse_inserter_and_backup::( + spawn_clickhouse_inserter_and_backup::( &client, block_rx, &task_executor, diff --git a/crates/rbuilder-operator/src/metrics.rs b/crates/rbuilder-operator/src/metrics.rs index 40f63ac43..bfd26a836 100644 --- a/crates/rbuilder-operator/src/metrics.rs +++ b/crates/rbuilder-operator/src/metrics.rs @@ -1,10 +1,18 @@ #![allow(unexpected_cfgs)] +use std::time::Duration; + use ctor::ctor; use lazy_static::lazy_static; use metrics_macros::register_metrics; -use prometheus::{IntCounterVec, IntGaugeVec, Opts}; -use rbuilder::{telemetry::REGISTRY, utils::build_info::Version}; +use prometheus::{ + HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Opts, +}; +use rbuilder::{ + telemetry::{exponential_buckets_range, REGISTRY}, + utils::{self, build_info::Version}, +}; +use rbuilder_utils::clickhouse::Quantities; register_metrics! { pub static BLOCK_API_ERRORS: IntCounterVec = IntCounterVec::new( @@ -19,8 +27,62 @@ register_metrics! { ) .unwrap(); + pub static CLICKHOUSE_WRITE_FAILURES: IntCounter = IntCounter::new("clickhouse_write_failures", "Clickhouse write failures for built blocks") + .unwrap(); + pub static CLICKHOUSE_ROWS_COMMITTED: IntCounter = IntCounter::new("clickhouse_rows_committed", "Clickhouse built blocks commited directly to clickhouse (no backup involved)") + .unwrap(); + pub static CLICKHOUSE_BYTES_COMMITTED: IntCounter = IntCounter::new("clickhouse_bytes_committed", "Clickhouse built blocks bytes commited directly to clickhouse (no backup involved)") + .unwrap(); + pub static CLICKHOUSE_BATCHES_COMMITTED: IntCounter = IntCounter::new("clickhouse_batches_committed", "Clickhouse built blocks batches commited directly to clickhouse (no backup involved)") + .unwrap(); + pub static CLICKHOUSE_ROWS_COMMITTED_FROM_BACKUP: IntCounter = IntCounter::new("clickhouse_rows_committed_from_backup", "Clickhouse built blocks commited to clickhouse from the local backup") + .unwrap(); + pub static CLICKHOUSE_BYTES_COMMITTED_FROM_BACKUP: IntCounter = IntCounter::new("clickhouse_bytes_committed_from_backup", "Clickhouse built blocks bytes commited to clickhouse from the local backup") + .unwrap(); + + pub static CLICKHOUSE_ROWS_LOST: IntCounter = IntCounter::new("clickhouse_rows_lost", "") + .unwrap(); + pub static CLICKHOUSE_BYTES_LOST: IntCounter = IntCounter::new("clickhouse_bytes_lost", "") + .unwrap(); + + + + pub static CLICKHOUSE_COMMIT_FAILURES: IntCounter = IntCounter::new("clickhouse_commit_failures", "Clickhouse built blocks batches commited failures") + .unwrap(); + pub static CLICKHOUSE_BACKUP_DISK_ERRORS: IntCounter = IntCounter::new("clickhouse_backup_disk_errors", "Any problem related to the disk backup, it can be reading, writing, etc.") + .unwrap(); + pub static CLICKHOUSE_BATCH_COMMIT_TIME: HistogramVec = HistogramVec::new( + HistogramOpts::new("clickhouse_batch_commit_time","Time to commit a block batch to Clickhouse (ms)") + .buckets(exponential_buckets_range(0.5, 3000.0, 50)), + &[] + ) + .unwrap(); + pub static CLICKHOUSE_QUEUE_SIZE: IntGauge = + IntGauge::new("clickhouse_queue_size", "Size of the queue of the task that is inserting into clickhouse").unwrap(); + pub static CLICKHOUSE_DISK_BACKUP_SIZE_BYTES: IntGauge = + IntGauge::new("clickhouse_disk_backup_size_bytes", "Space used in bytes by the local DB for failed commit batches.").unwrap(); + pub static CLICKHOUSE_DISK_BACKUP_SIZE_BATCHES: IntGauge = + IntGauge::new("clickhouse_disk_backup_size_batches", "Amount of batches in local DB for failed commit batches.").unwrap(); + pub static CLICKHOUSE_MEMORY_BACKUP_SIZE_BYTES: IntGauge = + IntGauge::new("clickhouse_memory_backup_size_bytes", "Space used in bytes by the in memory DB for failed commit batches.").unwrap(); + pub static CLICKHOUSE_MEMORY_BACKUP_SIZE_BATCHES: IntGauge = + IntGauge::new("clickhouse_memory_backup_size_batches", "Amount of batches in in memory DB for failed commit batches.").unwrap(); + + + + + } +/* + /// Space used by the local DB for failed commit batches. + fn set_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn increment_backup_disk_errors(order: &'static str, error: &str); + /// Space used in memory for failed commit batches. + fn set_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); + +*/ + pub fn inc_submit_block_errors() { BLOCK_API_ERRORS.with_label_values(&["submit_block"]).inc() } @@ -38,3 +100,55 @@ pub(super) fn set_bidding_service_version(version: Version) { ]) .set(1); } + +pub(crate) struct ClickhouseMetrics {} + +impl rbuilder_utils::clickhouse::backup::metrics::Metrics for ClickhouseMetrics { + fn increment_write_failures(_err: String) { + CLICKHOUSE_WRITE_FAILURES.inc(); + } + + fn process_quantities(quantities: &Quantities) { + CLICKHOUSE_ROWS_COMMITTED.inc_by(quantities.rows); + CLICKHOUSE_BYTES_COMMITTED.inc_by(quantities.bytes); + CLICKHOUSE_BATCHES_COMMITTED.inc(); + } + + fn record_batch_commit_time(duration: Duration) { + CLICKHOUSE_BATCH_COMMIT_TIME + .with_label_values(&[]) + .observe(utils::duration_ms(duration)); + } + + fn increment_commit_failures(_err: String) { + CLICKHOUSE_COMMIT_FAILURES.inc(); + } + + fn set_queue_size(size: usize, _order: &'static str) { + CLICKHOUSE_QUEUE_SIZE.set(size as i64); + } + + fn set_disk_backup_size(size_bytes: u64, batches: usize, _order: &'static str) { + CLICKHOUSE_DISK_BACKUP_SIZE_BYTES.set(size_bytes as i64); + CLICKHOUSE_DISK_BACKUP_SIZE_BATCHES.set(batches as i64); + } + + fn increment_backup_disk_errors(_order: &'static str, _error: &str) { + CLICKHOUSE_BACKUP_DISK_ERRORS.inc(); + } + + fn set_memory_backup_size(size_bytes: u64, batches: usize, _order: &'static str) { + CLICKHOUSE_MEMORY_BACKUP_SIZE_BYTES.set(size_bytes as i64); + CLICKHOUSE_MEMORY_BACKUP_SIZE_BATCHES.set(batches as i64); + } + + fn process_backup_data_lost_quantities(quantities: &Quantities) { + CLICKHOUSE_ROWS_LOST.inc_by(quantities.rows); + CLICKHOUSE_BYTES_LOST.inc_by(quantities.bytes); + } + + fn process_backup_data_quantities(quantities: &Quantities) { + CLICKHOUSE_ROWS_COMMITTED_FROM_BACKUP.inc_by(quantities.rows); + CLICKHOUSE_BYTES_COMMITTED_FROM_BACKUP.inc_by(quantities.bytes); + } +} From ee36b62e1118426e1085377abdd5354fc9785135 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 13 Nov 2025 17:35:36 -0300 Subject: [PATCH 09/26] polishing metrics --- crates/rbuilder-operator/src/metrics.rs | 5 ++--- .../src/clickhouse/backup/metrics.rs | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/rbuilder-operator/src/metrics.rs b/crates/rbuilder-operator/src/metrics.rs index bfd26a836..3830b6dc6 100644 --- a/crates/rbuilder-operator/src/metrics.rs +++ b/crates/rbuilder-operator/src/metrics.rs @@ -40,13 +40,12 @@ register_metrics! { pub static CLICKHOUSE_BYTES_COMMITTED_FROM_BACKUP: IntCounter = IntCounter::new("clickhouse_bytes_committed_from_backup", "Clickhouse built blocks bytes commited to clickhouse from the local backup") .unwrap(); - pub static CLICKHOUSE_ROWS_LOST: IntCounter = IntCounter::new("clickhouse_rows_lost", "") + pub static CLICKHOUSE_ROWS_LOST: IntCounter = IntCounter::new("clickhouse_rows_lost", "clickhouse_rows_lost") .unwrap(); - pub static CLICKHOUSE_BYTES_LOST: IntCounter = IntCounter::new("clickhouse_bytes_lost", "") + pub static CLICKHOUSE_BYTES_LOST: IntCounter = IntCounter::new("clickhouse_bytes_lost", "clickhouse_bytes_lost") .unwrap(); - pub static CLICKHOUSE_COMMIT_FAILURES: IntCounter = IntCounter::new("clickhouse_commit_failures", "Clickhouse built blocks batches commited failures") .unwrap(); pub static CLICKHOUSE_BACKUP_DISK_ERRORS: IntCounter = IntCounter::new("clickhouse_backup_disk_errors", "Any problem related to the disk backup, it can be reading, writing, etc.") diff --git a/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs b/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs index 03293fb5a..01e461219 100644 --- a/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs +++ b/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs @@ -3,17 +3,31 @@ use std::time::Duration; /// Metrics updated by the clickhouse_with_backup mod. pub trait Metrics { + /// Failed to write the data to clickhouse either on the first try or from backup. fn increment_write_failures(err: String); + /// Quantities of the data that was inserted into clickhouse on the first try, no backup involved. fn process_quantities(quantities: &Quantities); + /// Time taken to commit the data to clickhouse either on the first try or from backup. fn record_batch_commit_time(duration: Duration); + /// Failed to commit the data to clickhouse either on the first try or from backup. fn increment_commit_failures(err: String); + /// Size of the queue of the task that is inserting into clickhouse. fn set_queue_size(size: usize, order: &'static str); + /// Space used by the local DB for failed commit batches. fn set_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); + /// Any problem related to the disk backup, it can be reading, writing, etc. fn increment_backup_disk_errors(order: &'static str, error: &str); + /// Space used in memory for failed commit batches. fn set_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); + /// ??? fn process_backup_data_lost_quantities(quantities: &Quantities); + /// Some rows were inserted into clickhouse from the local backup DB. fn process_backup_data_quantities(quantities: &Quantities); - fn set_backup_empty_size(order: &'static str); + /// Backup was emptied. No more unsaved data to commit. Equivalent to set_disk_backup_size(0,0,order)+set_memory_backup_size(0,0,order) + fn set_backup_empty_size(order: &'static str) { + Self::set_memory_backup_size(0, 0, order); + Self::set_disk_backup_size(0, 0, order); + } } /// Feeling lazy? Grafana is too expensive for you? From ee8b2a357cc22004fc3e824cc86f29bb185f739d Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 13 Nov 2025 17:37:14 -0300 Subject: [PATCH 10/26] lint --- crates/rbuilder-operator/src/clickhouse.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index 495c04629..d1d1af5f3 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -14,12 +14,9 @@ use rbuilder::{ }; use rbuilder_primitives::{Order, OrderId}; use rbuilder_utils::clickhouse::{ - backup::{ - metrics::NullMetrics, - primitives::{ClickhouseIndexableData, ClickhouseRowExt}, - }, + backup::primitives::{ClickhouseIndexableData, ClickhouseRowExt}, serde::{option_u256, vec_u256}, - spawn_clickhouse_inserter_and_backup, Quantities, + spawn_clickhouse_inserter_and_backup, }; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; From 5c867415da2e2960295b832c5a7fcb099050c01c Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 13 Nov 2025 17:38:30 -0300 Subject: [PATCH 11/26] slightly better kill process --- .../client/bidding_service_client_adapter.rs | 40 +++++++++++++---- .../rbuilder-operator/src/flashbots_config.rs | 5 ++- .../rbuilder/src/live_builder/base_config.rs | 6 +-- crates/rbuilder/src/live_builder/mod.rs | 43 +++++++++++++++++++ crates/rbuilder/src/live_builder/watchdog.rs | 20 ++++----- 5 files changed, 91 insertions(+), 23 deletions(-) diff --git a/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs b/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs index 59f94283c..5d35cb75c 100644 --- a/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs +++ b/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs @@ -2,9 +2,13 @@ use futures_util::FutureExt; use hyper_util::rt::TokioIo; use parking_lot::Mutex; use rbuilder::{ - live_builder::block_output::bidding_service_interface::{ - BiddingService, BlockSealInterfaceForSlotBidder, LandedBlockInfo as RealLandedBlockInfo, - RelaySet, ScrapedRelayBlockBidWithStats, SlotBidder, SlotBlockId, + live_builder::{ + block_output::bidding_service_interface::{ + BiddingService, BlockSealInterfaceForSlotBidder, + LandedBlockInfo as RealLandedBlockInfo, RelaySet, ScrapedRelayBlockBidWithStats, + SlotBidder, SlotBlockId, + }, + ProcessKiller, }, utils::build_info::Version, }; @@ -109,6 +113,7 @@ impl BiddingServiceClientAdapter { landed_blocks_history: &[RealLandedBlockInfo], all_relay_ids: RelaySet, cancellation_token: CancellationToken, + process_killer: ProcessKiller, ) -> Result { let session_id_to_slot_bidder = Arc::new(Mutex::new(HashMap::new())); let (commands_sender, relay_sets) = Self::init_sender_task( @@ -116,6 +121,7 @@ impl BiddingServiceClientAdapter { landed_blocks_history, all_relay_ids, session_id_to_slot_bidder.clone(), + process_killer, ) .await?; spawn_slot_bidder_seal_bid_command_subscriber( @@ -146,6 +152,7 @@ impl BiddingServiceClientAdapter { session_id_to_slot_bidder: Arc< Mutex>>, >, + process_killer: ProcessKiller, ) -> Result<( mpsc::UnboundedSender, Vec, @@ -185,7 +192,7 @@ impl BiddingServiceClientAdapter { build_time_utc: bidding_service_version.build_time_utc, }); let (commands_sender, rx) = mpsc::unbounded_channel::(); - Self::spawn_sender_loop_task(rx, client, session_id_to_slot_bidder); + Self::spawn_sender_loop_task(rx, client, session_id_to_slot_bidder, process_killer); Ok((commands_sender, relay_sets)) } @@ -197,6 +204,7 @@ impl BiddingServiceClientAdapter { session_id_to_slot_bidder: Arc< Mutex>>, >, + process_killer: ProcessKiller, ) { // Spawn a task to execute received futures tokio::spawn(async move { @@ -207,25 +215,34 @@ impl BiddingServiceClientAdapter { &mut client, create_slot_data, session_id_to_slot_bidder.clone(), + &process_killer, ) .await; } BiddingServiceClientCommand::MustWinBlock(must_win_block_params) => { - Self::handle_error(client.must_win_block(must_win_block_params).await); + Self::handle_error( + client.must_win_block(must_win_block_params).await, + &process_killer, + ); } BiddingServiceClientCommand::UpdateNewLandedBlocksDetected(params) => { - Self::handle_error(client.update_new_landed_blocks_detected(params).await); + Self::handle_error( + client.update_new_landed_blocks_detected(params).await, + &process_killer, + ); } BiddingServiceClientCommand::UpdateFailedReadingNewLandedBlocks => { Self::handle_error( client .update_failed_reading_new_landed_blocks(Empty {}) .await, + &process_killer, ); } BiddingServiceClientCommand::DestroySlotBidder(destroy_slot_bidder_params) => { Self::handle_error( client.destroy_slot_bidder(destroy_slot_bidder_params).await, + &process_killer, ); session_id_to_slot_bidder .lock() @@ -243,6 +260,7 @@ impl BiddingServiceClientAdapter { session_id_to_slot_bidder: Arc< Mutex>>, >, + process_killer: &ProcessKiller, ) { let session_id = create_slot_bidder_data.params.session_id; session_id_to_slot_bidder @@ -253,16 +271,20 @@ impl BiddingServiceClientAdapter { .await { session_id_to_slot_bidder.lock().remove(&session_id); - Self::handle_error(Err(err)); + Self::handle_error(Err(err), process_killer); }; } /// If error logs it. /// return result is error - fn handle_error(result: tonic::Result>) -> bool { + fn handle_error( + result: tonic::Result>, + process_killer: &ProcessKiller, + ) -> bool { if let Err(error) = &result { error!(error=?error,"RPC call error, killing process so it reconnects"); - std::process::exit(1); + process_killer.kill("RPC call error", true); + true } else { false } diff --git a/crates/rbuilder-operator/src/flashbots_config.rs b/crates/rbuilder-operator/src/flashbots_config.rs index 8e2c3fbfc..0c9f69469 100644 --- a/crates/rbuilder-operator/src/flashbots_config.rs +++ b/crates/rbuilder-operator/src/flashbots_config.rs @@ -26,7 +26,7 @@ use rbuilder::{ SpecificBuilderConfig, }, payload_events::MevBoostSlotData, - LiveBuilder, + LiveBuilder, ProcessKiller, }, provider::StateProviderFactory, utils::build_info::Version, @@ -155,6 +155,7 @@ impl LiveBuilderConfig for FlashbotsConfig { &landed_blocks, self.l1_config.relays_ids(), cancellation_token.clone(), + ProcessKiller::new(cancellation_token.clone()), ) .await?; @@ -264,12 +265,14 @@ impl FlashbotsConfig { landed_blocks_history: &[LandedBlockInfo], all_relay_ids: RelaySet, cancellation_token: CancellationToken, + process_killer: ProcessKiller, ) -> eyre::Result> { let bidding_service_client = BiddingServiceClientAdapter::new( &self.bidding_service_ipc_path, landed_blocks_history, all_relay_ids, cancellation_token, + process_killer, ) .await .map_err(|e| eyre::Report::new(e).wrap_err("Unable to connect to remote bidder"))?; diff --git a/crates/rbuilder/src/live_builder/base_config.rs b/crates/rbuilder/src/live_builder/base_config.rs index 733121e74..293125629 100644 --- a/crates/rbuilder/src/live_builder/base_config.rs +++ b/crates/rbuilder/src/live_builder/base_config.rs @@ -6,7 +6,7 @@ use crate::{ NullOrderFlowTracerManager, OrderFlowTracerManager, OrderFlowTracerManagerImpl, }, order_input::OrderInputConfig, - LiveBuilder, + LiveBuilder, ProcessKiller, }, provider::{ ipc_state_provider::{IpcProviderConfig, IpcStateProviderFactory}, @@ -254,8 +254,8 @@ impl BaseConfig { extra_data: self.extra_data.clone(), blocklist_provider, - global_cancellation: cancellation_token, - + global_cancellation: cancellation_token.clone(), + process_killer: ProcessKiller::new(cancellation_token), extra_rpc: RpcModule::new(()), unfinished_built_blocks_input_factory, builders: Vec::new(), diff --git a/crates/rbuilder/src/live_builder/mod.rs b/crates/rbuilder/src/live_builder/mod.rs index 908dd6a18..aa2b81c88 100644 --- a/crates/rbuilder/src/live_builder/mod.rs +++ b/crates/rbuilder/src/live_builder/mod.rs @@ -118,6 +118,7 @@ where pub blocklist_provider: Arc, pub global_cancellation: CancellationToken, + pub process_killer: ProcessKiller, pub unfinished_built_blocks_input_factory: UnfinishedBuiltBlocksInputFactory

, pub builders: Vec>>, @@ -206,6 +207,7 @@ where Some(duration) => Some(spawn_watchdog_thread( duration, "block build started".to_string(), + self.process_killer.clone(), )?), None => { info!("Watchdog not enabled"); @@ -469,3 +471,44 @@ async fn try_send_to_orderpool( } } } + +/// This time should be enough to let the process to finish its work and exit gracefully. +/// Example of this need is the clickhouse backup that takes a while to finish and we don't want to loose any blocks. +const PROCESS_KILLER_WAIT_TIME: Duration = Duration::from_secs(12); +#[derive(Debug, Clone)] +pub struct ProcessKiller { + cancellation_token: CancellationToken, +} + +impl ProcessKiller { + pub fn new(cancellation_token: CancellationToken) -> Self { + Self { cancellation_token } + } + + /// is_error -> This is a critical kill. + /// !is_error -> This is a graceful kill. + pub fn kill(&self, reason: &str, is_error: bool) { + // puaj, copy paste + if is_error { + error!( + reason, + wait_time_secs = PROCESS_KILLER_WAIT_TIME.as_secs(), + "Process killing started, signaling cancellation token and waiting" + ); + } else { + info!( + reason, + wait_time_secs = PROCESS_KILLER_WAIT_TIME.as_secs(), + "Process killing started, signaling cancellation token and waiting" + ); + } + self.cancellation_token.cancel(); + std::thread::sleep(PROCESS_KILLER_WAIT_TIME); + if is_error { + error!(reason, "Killing process"); + } else { + info!(reason, "Killing process"); + } + std::process::exit(1); + } +} diff --git a/crates/rbuilder/src/live_builder/watchdog.rs b/crates/rbuilder/src/live_builder/watchdog.rs index 3d91eb495..45fb2bd4f 100644 --- a/crates/rbuilder/src/live_builder/watchdog.rs +++ b/crates/rbuilder/src/live_builder/watchdog.rs @@ -2,10 +2,16 @@ use flume::RecvTimeoutError; use std::{io, time::Duration}; use tracing::{error, info}; +use crate::live_builder::ProcessKiller; + /// Spawns a thread that will kill the process if there is no events sent on the channel /// for the timeout time. /// context is a string to be logged to be able to distinguish different types of deaths. -pub fn spawn_watchdog_thread(timeout: Duration, context: String) -> io::Result> { +pub fn spawn_watchdog_thread( + timeout: Duration, + context: String, + process_killer: ProcessKiller, +) -> io::Result> { let (sender, receiver) = flume::unbounded(); std::thread::Builder::new() .name(String::from("watchdog")) @@ -14,21 +20,15 @@ pub fn spawn_watchdog_thread(timeout: Duration, context: String) -> io::Result {} Err(RecvTimeoutError::Timeout) => { - error!(context, "Watchdog timeout"); - std::process::exit(1); + process_killer + .kill(format!("Watchdog timeout: {}", context).as_str(), true); } Err(RecvTimeoutError::Disconnected) => { break; } } } - info!( - context, - "Watchdog finished, will kill application in 12 seconds" - ); - - std::thread::sleep(Duration::from_secs(12)); - std::process::exit(1); + process_killer.kill(format!("Watchdog finished: {}", context).as_str(), false); })?; Ok(sender) From 3ea550afb801cb80497360ddf1bd39b8f09827f8 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Fri, 14 Nov 2025 12:51:24 -0300 Subject: [PATCH 12/26] fixed/polished clickhouse metrics better shutdown --- .../client/bidding_service_client_adapter.rs | 4 +- crates/rbuilder-operator/src/clickhouse.rs | 3 +- .../rbuilder-operator/src/flashbots_config.rs | 3 +- .../src/clickhouse/backup/metrics.rs | 17 +++--- .../src/clickhouse/backup/mod.rs | 35 +++++++----- .../rbuilder-utils/src/clickhouse/indexer.rs | 8 +-- .../rbuilder/src/live_builder/base_config.rs | 3 +- crates/rbuilder/src/live_builder/cli.rs | 30 ++++++++-- crates/rbuilder/src/live_builder/mod.rs | 43 +------------- .../src/live_builder/process_killer.rs | 57 +++++++++++++++++++ crates/rbuilder/src/live_builder/watchdog.rs | 10 ++-- 11 files changed, 132 insertions(+), 81 deletions(-) create mode 100644 crates/rbuilder/src/live_builder/process_killer.rs diff --git a/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs b/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs index 5d35cb75c..8abe46208 100644 --- a/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs +++ b/crates/rbuilder-operator/src/bidding_service_wrapper/client/bidding_service_client_adapter.rs @@ -8,7 +8,7 @@ use rbuilder::{ LandedBlockInfo as RealLandedBlockInfo, RelaySet, ScrapedRelayBlockBidWithStats, SlotBidder, SlotBlockId, }, - ProcessKiller, + process_killer::ProcessKiller, }, utils::build_info::Version, }; @@ -283,7 +283,7 @@ impl BiddingServiceClientAdapter { ) -> bool { if let Err(error) = &result { error!(error=?error,"RPC call error, killing process so it reconnects"); - process_killer.kill("RPC call error", true); + process_killer.kill("RPC call error"); true } else { false diff --git a/crates/rbuilder-operator/src/clickhouse.rs b/crates/rbuilder-operator/src/clickhouse.rs index d1d1af5f3..af3a37aa0 100644 --- a/crates/rbuilder-operator/src/clickhouse.rs +++ b/crates/rbuilder-operator/src/clickhouse.rs @@ -10,6 +10,7 @@ use rbuilder::{ live_builder::{ block_output::bidding_service_interface::{BidObserver, RelaySet}, payload_events::MevBoostSlotData, + process_killer::RUN_SUBMIT_TO_RELAYS_JOB_CANCEL_TIME, }, }; use rbuilder_primitives::{Order, OrderId}; @@ -136,7 +137,7 @@ impl BuiltBlocksWriter { tokio::spawn(async move { cancellation_token.cancelled().await; // @Pending: Needed to avoid losing blocks but we should try to avoid this. - tokio::time::sleep(Duration::from_secs(1)).await; + tokio::time::sleep(RUN_SUBMIT_TO_RELAYS_JOB_CANCEL_TIME).await; task_manager.graceful_shutdown_with_timeout(Duration::from_secs(5)); }); Self { diff --git a/crates/rbuilder-operator/src/flashbots_config.rs b/crates/rbuilder-operator/src/flashbots_config.rs index 0c9f69469..9a8bb4ffd 100644 --- a/crates/rbuilder-operator/src/flashbots_config.rs +++ b/crates/rbuilder-operator/src/flashbots_config.rs @@ -26,7 +26,8 @@ use rbuilder::{ SpecificBuilderConfig, }, payload_events::MevBoostSlotData, - LiveBuilder, ProcessKiller, + process_killer::ProcessKiller, + LiveBuilder, }, provider::StateProviderFactory, utils::build_info::Version, diff --git a/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs b/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs index 01e461219..18a734b23 100644 --- a/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs +++ b/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs @@ -3,25 +3,26 @@ use std::time::Duration; /// Metrics updated by the clickhouse_with_backup mod. pub trait Metrics { - /// Failed to write the data to clickhouse either on the first try or from backup. + /// Failed to write the data to clickhouse either on the first try (ClickhouseInserter) or from backup failures, labelled with the error. fn increment_write_failures(err: String); - /// Quantities of the data that was inserted into clickhouse on the first try, no backup involved. + /// We just inserted a batch of Quantities of data into clickhouse (by the ClickhouseInserter) on the first try, no backup involved. fn process_quantities(quantities: &Quantities); /// Time taken to commit the data to clickhouse either on the first try or from backup. fn record_batch_commit_time(duration: Duration); - /// Failed to commit the data to clickhouse either on the first try or from backup. + /// Failed to commit the data to clickhouse either on the first try or from backup, labelled with the error. fn increment_commit_failures(err: String); - /// Size of the queue of the task that is inserting into clickhouse. + /// Size of the in-memory queue of the task that is inserting into clickhouse. fn set_queue_size(size: usize, order: &'static str); /// Space used by the local DB for failed commit batches. fn set_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); - /// Any problem related to the disk backup, it can be reading, writing, etc. + /// The total number of errors related to the disk backup, it can be reading, writing, etc. fn increment_backup_disk_errors(order: &'static str, error: &str); - /// Space used in memory for failed commit batches. + /// The size of the in-memory backup for failed commit batches. fn set_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); - /// ??? + /// Some Quantities of data has been definitely lost from the backup DB and could not be committed to + /// clickhouse. fn process_backup_data_lost_quantities(quantities: &Quantities); - /// Some rows were inserted into clickhouse from the local backup DB. + /// Some rows Quantities data was restored from the backup and committed to clickhouse. fn process_backup_data_quantities(quantities: &Quantities); /// Backup was emptied. No more unsaved data to commit. Equivalent to set_disk_backup_size(0,0,order)+set_memory_backup_size(0,0,order) fn set_backup_empty_size(order: &'static str) { diff --git a/crates/rbuilder-utils/src/clickhouse/backup/mod.rs b/crates/rbuilder-utils/src/clickhouse/backup/mod.rs index 9d22006f2..e45eb5c51 100644 --- a/crates/rbuilder-utils/src/clickhouse/backup/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse/backup/mod.rs @@ -437,12 +437,16 @@ impl MemoryBackup { self.stats.size_bytes > self.config.max_size_bytes && self.failed_commits.len() > 1 } - /// Drops the oldest failed commit if the threshold has been exceeded, returning the updated - /// stats + /// Drops the oldest failed commit if the threshold has been exceeded, + /// returning (updated stats, Quantities of the dropped commit) fn drop_excess(&mut self) -> Option<(BackupSourceStats, Quantities)> { if self.threshold_exceeded() { - self.failed_commits.pop_back(); - Some((self.update_stats(), self.failed_commits.quantities())) + let dropped_quantities = self + .failed_commits + .pop_back() + .map(|commit| commit.quantities) + .unwrap_or(Quantities::ZERO); + Some((self.update_stats(), dropped_quantities)) } else { None } @@ -569,9 +573,9 @@ impl Backup { MetricsType::set_memory_backup_size(stats.size_bytes, stats.total_batches, T::TABLE_NAME); tracing::debug!(target: TARGET, order = T::TABLE_NAME, bytes = ?quantities.bytes, rows = ?quantities.rows, ?stats, "saved failed commit in-memory"); - if let Some((stats, oldest_quantities)) = self.memory_backup.drop_excess() { + if let Some((stats, dropped_quantities)) = self.memory_backup.drop_excess() { tracing::warn!(target: TARGET, order = T::TABLE_NAME, ?stats, "failed commits exceeded max memory backup size, dropping oldest"); - MetricsType::process_backup_data_lost_quantities(&oldest_quantities); + MetricsType::process_backup_data_lost_quantities(&dropped_quantities); // Clear the cached last commit if it was from memory and we just dropped it. self.last_cached = self .last_cached @@ -655,7 +659,7 @@ impl Backup { tokio::select! { maybe_failed_commit = self.rx.recv() => { let Some(failed_commit) = maybe_failed_commit else { - tracing::error!(target: TARGET, order = T::TABLE_NAME, "backup channel closed"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, "Backup channel closed"); break; }; @@ -716,16 +720,16 @@ impl Backup { } if let Err(e) = self.disk_backup.flush().await { - tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to flush disk backup during shutdown"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "Failed to flush disk backup during shutdown"); MetricsType::increment_backup_disk_errors(T::TABLE_NAME, e.as_ref()); } else { - tracing::info!(target: TARGET, order = T::TABLE_NAME, "flushed disk backup during shutdown"); + tracing::info!(target: TARGET, order = T::TABLE_NAME, "Flushed disk backup during shutdown"); } if let Err(e) = self.inserter.end().await { - tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "failed to end backup inserter during shutdown"); + tracing::error!(target: TARGET, order = T::TABLE_NAME, ?e, "Failed to end backup inserter during shutdown"); } else { - tracing::info!(target: TARGET, order = T::TABLE_NAME, "successfully ended backup inserter during shutdown"); + tracing::info!(target: TARGET, order = T::TABLE_NAME, "Successfully ended backup inserter during shutdown"); } } @@ -739,14 +743,19 @@ impl Backup { let mut shutdown_guard = None; tokio::select! { _ = self.run() => { - tracing::info!(target, "clickhouse {} backup channel closed", name); + tracing::info!(target,table_name = name, "Clickhouse backup channel closed"); } guard = shutdown => { - tracing::info!(target, "Received shutdown for {} backup, performing cleanup", name); + tracing::info!(target, table_name = name,"Received shutdown backup, performing clickhouse backup cleanup"); shutdown_guard = Some(guard); }, } self.end().await; + tracing::info!( + target, + table_name = name, + "Clickhouse backup cleanup complete" + ); drop(shutdown_guard); }); } diff --git a/crates/rbuilder-utils/src/clickhouse/indexer.rs b/crates/rbuilder-utils/src/clickhouse/indexer.rs index 296da120d..7d68cfd2f 100644 --- a/crates/rbuilder-utils/src/clickhouse/indexer.rs +++ b/crates/rbuilder-utils/src/clickhouse/indexer.rs @@ -224,20 +224,20 @@ impl InserterRunner { - tracing::info!(target, "clickhouse {name} indexer channel closed"); + tracing::info!(target,table_name = name, "clickhouse indexer channel closed"); } guard = shutdown => { - tracing::info!(target, "Received shutdown for {name} indexer, performing cleanup"); + tracing::info!(target,table_name = name, "Received shutdown for indexer, performing cleanup"); shutdown_guard = Some(guard); }, } match self.end().await { Ok(quantities) => { - tracing::info!(target, ?quantities, "finalized clickhouse {} inserter", name); + tracing::info!(target, ?quantities, table_name = name, "finalized clickhouse inserter"); } Err(e) => { - tracing::error!(target, ?e, "failed to write end insertion of {} to indexer", name); + tracing::error!(target,error = ?e, table_name = name, "failed to write end insertion of indexer"); } } drop(shutdown_guard); diff --git a/crates/rbuilder/src/live_builder/base_config.rs b/crates/rbuilder/src/live_builder/base_config.rs index 293125629..a82ebd2fd 100644 --- a/crates/rbuilder/src/live_builder/base_config.rs +++ b/crates/rbuilder/src/live_builder/base_config.rs @@ -6,7 +6,8 @@ use crate::{ NullOrderFlowTracerManager, OrderFlowTracerManager, OrderFlowTracerManagerImpl, }, order_input::OrderInputConfig, - LiveBuilder, ProcessKiller, + process_killer::ProcessKiller, + LiveBuilder, }, provider::{ ipc_state_provider::{IpcProviderConfig, IpcStateProviderFactory}, diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 77c24b7c4..2aff740e4 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -3,18 +3,21 @@ use rbuilder_config::load_toml_config; use serde::de::DeserializeOwned; use std::{ fmt::Debug, + io::Write, path::PathBuf, sync::{atomic::AtomicBool, Arc}, }; use sysperf::{format_results, gather_system_info, run_all_benchmarks}; -use tokio::signal::ctrl_c; +use tokio::signal::{ctrl_c, unix::SignalKind}; use tokio_util::sync::CancellationToken; +use tracing::error; use crate::{ building::{ builders::{BacktestSimulateBlockInput, Block}, PartialBlockExecutionTracer, }, + live_builder::process_killer::{FLUSH_TRACE_TIME, MAX_WAIT_TIME}, provider::StateProviderFactory, telemetry, utils::{bls::generate_random_bls_address, build_info::Version}, @@ -127,13 +130,19 @@ where config.version_for_telemetry(), ) .await?; - if config.base_config().ipc_provider.is_some() { + let res = if config.base_config().ipc_provider.is_some() { let provider = config.base_config().create_ipc_provider_factory()?; run_builder(provider, config, on_run, ready_to_build).await } else { let provider = config.base_config().create_reth_provider_factory(false)?; run_builder(provider, config, on_run, ready_to_build).await - } + }; + // Flush the stdout and stderr buffers so all tracing messages are flushed. + std::io::stdout().flush().ok(); + std::io::stderr().flush().ok(); + // Small delay to let any async work complete so flushed buffers are actually flushed. + std::thread::sleep(FLUSH_TRACE_TIME); + res } async fn run_builder( @@ -149,8 +158,18 @@ where let cancel = CancellationToken::new(); let builder = config.new_builder(provider, cancel.clone()).await?; + let terminate = async { + tokio::signal::unix::signal(SignalKind::terminate()) + .expect("failed to install signal handler") + .recv() + .await; + }; + let ctrlc = tokio::spawn(async move { - ctrl_c().await.unwrap_or_default(); + tokio::select! { + _ = ctrl_c() => { tracing::info!("Received SIGINT, closing down..."); }, + _ = terminate => { tracing::info!("Received SIGTERM, closing down..."); }, + } cancel.cancel() }); if let Some(on_run) = on_run { @@ -158,5 +177,8 @@ where } builder.run(ready_to_build).await?; ctrlc.await.unwrap_or_default(); + error!("Main thread waiting to die..."); + std::thread::sleep(MAX_WAIT_TIME); + error!("Main thread exiting"); Ok(()) } diff --git a/crates/rbuilder/src/live_builder/mod.rs b/crates/rbuilder/src/live_builder/mod.rs index aa2b81c88..e54618f67 100644 --- a/crates/rbuilder/src/live_builder/mod.rs +++ b/crates/rbuilder/src/live_builder/mod.rs @@ -7,6 +7,7 @@ pub mod config; pub mod order_flow_tracing; pub mod order_input; pub mod payload_events; +pub mod process_killer; pub mod simulation; pub mod wallet_balance_watcher; pub mod watchdog; @@ -16,6 +17,7 @@ use crate::{ live_builder::{ order_flow_tracing::order_flow_tracer_manager::OrderFlowTracerManager, order_input::{start_orderpool_jobs, OrderInputConfig}, + process_killer::ProcessKiller, simulation::OrderSimulationPool, watchdog::spawn_watchdog_thread, }, @@ -471,44 +473,3 @@ async fn try_send_to_orderpool( } } } - -/// This time should be enough to let the process to finish its work and exit gracefully. -/// Example of this need is the clickhouse backup that takes a while to finish and we don't want to loose any blocks. -const PROCESS_KILLER_WAIT_TIME: Duration = Duration::from_secs(12); -#[derive(Debug, Clone)] -pub struct ProcessKiller { - cancellation_token: CancellationToken, -} - -impl ProcessKiller { - pub fn new(cancellation_token: CancellationToken) -> Self { - Self { cancellation_token } - } - - /// is_error -> This is a critical kill. - /// !is_error -> This is a graceful kill. - pub fn kill(&self, reason: &str, is_error: bool) { - // puaj, copy paste - if is_error { - error!( - reason, - wait_time_secs = PROCESS_KILLER_WAIT_TIME.as_secs(), - "Process killing started, signaling cancellation token and waiting" - ); - } else { - info!( - reason, - wait_time_secs = PROCESS_KILLER_WAIT_TIME.as_secs(), - "Process killing started, signaling cancellation token and waiting" - ); - } - self.cancellation_token.cancel(); - std::thread::sleep(PROCESS_KILLER_WAIT_TIME); - if is_error { - error!(reason, "Killing process"); - } else { - info!(reason, "Killing process"); - } - std::process::exit(1); - } -} diff --git a/crates/rbuilder/src/live_builder/process_killer.rs b/crates/rbuilder/src/live_builder/process_killer.rs new file mode 100644 index 000000000..2da8dd7c2 --- /dev/null +++ b/crates/rbuilder/src/live_builder/process_killer.rs @@ -0,0 +1,57 @@ +//! Sadly we need to improve out builder shutdown procedure :( +//! We have some places where we abruptly kill the process (eg: watchdog, bidding service communication errors) but +//! some modules need to finish their work so we must give them some time before killing the process. +//! Here we centralize all this hacky stuff so at least we can see all the constants in one place. + +use std::time::Duration; + +use alloy_eips::merge::SLOT_DURATION_SECS; +use tokio_util::sync::CancellationToken; +use tracing::error; + +/// Time for the run_submit_to_relays_job to stop submitting blocks after the cancellation token is cancelled. +/// It's just a loop that signs blocks and submits them async (on detached tasks) so if should not take more than a second. +pub const RUN_SUBMIT_TO_RELAYS_JOB_CANCEL_TIME_SECONDS: u64 = 1; + +/// Time for the block building to close after the cancellation token is cancelled. +/// We use a whole block as heuristic for the time to close. +pub const BLOCK_BUILDING_CLOSE_TIME_SECONDS: u64 = SLOT_DURATION_SECS; + +pub const RUN_SUBMIT_TO_RELAYS_JOB_CANCEL_TIME: Duration = + Duration::from_secs(RUN_SUBMIT_TO_RELAYS_JOB_CANCEL_TIME_SECONDS); + +/// This time should be enough to let the process to finish its work and exit gracefully. +/// Example of this need is the clickhouse backup that takes a while to finish and we don't want to loose any blocks. +/// This should be > than everything we have to wait for in the constants above. +pub const MAX_WAIT_TIME_SECONDS: u64 = BLOCK_BUILDING_CLOSE_TIME_SECONDS; +pub const MAX_WAIT_TIME: Duration = Duration::from_secs(MAX_WAIT_TIME_SECONDS); + +/// Time needed to let the tracing subscriber to flush its buffers. +pub const FLUSH_TRACE_TIME_MILLI_SECONDS: u64 = 200; +pub const FLUSH_TRACE_TIME: Duration = Duration::from_secs(FLUSH_TRACE_TIME_MILLI_SECONDS); + +/// We add 1 second to allow the process to finish its work and exit gracefully. +const PROCESS_KILLER_WAIT_TIME: Duration = Duration::from_secs(MAX_WAIT_TIME_SECONDS + 1); + +#[derive(Debug, Clone)] +pub struct ProcessKiller { + cancellation_token: CancellationToken, +} + +impl ProcessKiller { + pub fn new(cancellation_token: CancellationToken) -> Self { + Self { cancellation_token } + } + + pub fn kill(&self, reason: &str) { + error!( + reason, + wait_time_secs = PROCESS_KILLER_WAIT_TIME.as_secs(), + "Process killing started, signaling cancellation token and waiting" + ); + self.cancellation_token.cancel(); + std::thread::sleep(PROCESS_KILLER_WAIT_TIME); + error!(reason, "Killing process"); + std::process::exit(1); + } +} diff --git a/crates/rbuilder/src/live_builder/watchdog.rs b/crates/rbuilder/src/live_builder/watchdog.rs index 45fb2bd4f..c35625ac0 100644 --- a/crates/rbuilder/src/live_builder/watchdog.rs +++ b/crates/rbuilder/src/live_builder/watchdog.rs @@ -1,8 +1,7 @@ +use crate::live_builder::process_killer::ProcessKiller; use flume::RecvTimeoutError; use std::{io, time::Duration}; -use tracing::{error, info}; - -use crate::live_builder::ProcessKiller; +use tracing::info; /// Spawns a thread that will kill the process if there is no events sent on the channel /// for the timeout time. @@ -20,15 +19,14 @@ pub fn spawn_watchdog_thread( match receiver.recv_timeout(timeout) { Ok(()) => {} Err(RecvTimeoutError::Timeout) => { - process_killer - .kill(format!("Watchdog timeout: {}", context).as_str(), true); + process_killer.kill(format!("Watchdog timeout: {}", context).as_str()); } Err(RecvTimeoutError::Disconnected) => { break; } } } - process_killer.kill(format!("Watchdog finished: {}", context).as_str(), false); + info!(context, "Watchdog closed"); })?; Ok(sender) From eedac68c9f48e9f2cdc8a539565b45fd6e576082 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Fri, 14 Nov 2025 12:55:55 -0300 Subject: [PATCH 13/26] lint --- crates/rbuilder-operator/src/flashbots_config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rbuilder-operator/src/flashbots_config.rs b/crates/rbuilder-operator/src/flashbots_config.rs index 9a8bb4ffd..a7821f0b7 100644 --- a/crates/rbuilder-operator/src/flashbots_config.rs +++ b/crates/rbuilder-operator/src/flashbots_config.rs @@ -315,7 +315,7 @@ impl FlashbotsConfig { built_blocks_clickhouse_config.clone(), cancellation_token.clone(), ); - return Ok(Some(Box::new(writer))); + Ok(Some(Box::new(writer))) } else { if block_processor_key.is_some() { return Self::bail_blocks_processor_url_not_set(); From e1009122dedfbb1b22ecb8b30419e7e212f1ab64 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Fri, 14 Nov 2025 13:20:24 -0300 Subject: [PATCH 14/26] new timeout --- crates/rbuilder/src/integration/simple.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rbuilder/src/integration/simple.rs b/crates/rbuilder/src/integration/simple.rs index f2029bf85..3e5970b71 100644 --- a/crates/rbuilder/src/integration/simple.rs +++ b/crates/rbuilder/src/integration/simple.rs @@ -125,7 +125,7 @@ mod tests { 12 /* problem detected in next block start an cancel is signaled*/+ 15 /*watchdog_timeout_sec */+ 12 /*extra delay from watchdog*/+ - 1 /* for timing errors */; + 10 /* for timing errors */; tokio::time::sleep(Duration::from_secs(timeout_secs)).await; //puaj assert!(!srv.builder_is_alive()); } From f88d2d87ac1ae48255a82a567cedec8038bc06e4 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Fri, 14 Nov 2025 14:29:40 -0300 Subject: [PATCH 15/26] better shutdown on blocklist errors --- crates/rbuilder/src/integration/simple.rs | 9 ++++-- crates/rbuilder/src/live_builder/mod.rs | 34 +++++++++++++++-------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/crates/rbuilder/src/integration/simple.rs b/crates/rbuilder/src/integration/simple.rs index 3e5970b71..b492a07fd 100644 --- a/crates/rbuilder/src/integration/simple.rs +++ b/crates/rbuilder/src/integration/simple.rs @@ -2,7 +2,10 @@ mod tests { use crate::{ integration::playground::Playground, - live_builder::block_list_provider::test::{BlocklistHttpServer, BLOCKLIST_LEN_2}, + live_builder::{ + block_list_provider::test::{BlocklistHttpServer, BLOCKLIST_LEN_2}, + process_killer::MAX_WAIT_TIME_SECONDS, + }, }; use alloy_network::TransactionBuilder; @@ -124,8 +127,8 @@ mod tests { let timeout_secs = 5 /*blocklist_url_max_age_secs in cfg */ + 12 /* problem detected in next block start an cancel is signaled*/+ 15 /*watchdog_timeout_sec */+ - 12 /*extra delay from watchdog*/+ - 10 /* for timing errors */; + MAX_WAIT_TIME_SECONDS /*extra delay from letting the builder finish its work*/+ + 1 /* for timing errors */; tokio::time::sleep(Duration::from_secs(timeout_secs)).await; //puaj assert!(!srv.builder_is_alive()); } diff --git a/crates/rbuilder/src/live_builder/mod.rs b/crates/rbuilder/src/live_builder/mod.rs index e54618f67..dad3d0d21 100644 --- a/crates/rbuilder/src/live_builder/mod.rs +++ b/crates/rbuilder/src/live_builder/mod.rs @@ -55,7 +55,7 @@ use std::{ time::Duration, }; use time::OffsetDateTime; -use tokio::sync::mpsc; +use tokio::{sync::mpsc, task::JoinHandle}; use tokio_util::sync::CancellationToken; use tracing::*; @@ -151,6 +151,28 @@ where } pub async fn run(self, ready_to_build: Arc) -> eyre::Result<()> { + let global_cancellation = self.global_cancellation.clone(); + let mut inner_jobs_handles = Vec::new(); + let res = self + .run_no_cleanup(ready_to_build, &mut inner_jobs_handles) + .await; + info!("Builder shutting down"); + global_cancellation.cancel(); + for handle in inner_jobs_handles { + handle + .await + .map_err(|err| warn!(?err, "Job handle await error")) + .unwrap_or_default(); + } + res + } + + /// Run the builder without cleaning up after itself. + pub async fn run_no_cleanup( + self, + ready_to_build: Arc, + inner_jobs_handles: &mut Vec>, + ) -> eyre::Result<()> { info!( "Builder initial block list size: {}", self.blocklist_provider.get_blocklist()?.len(), @@ -167,7 +189,6 @@ where .with_context(|| "Error spawning error storage writer")?; } - let mut inner_jobs_handles = Vec::new(); let mut payload_events_channel = self.blocks_source.recv_slot_channel(); let (header_sender, header_receiver) = mpsc::channel(CLEAN_TASKS_CHANNEL_SIZE); @@ -334,15 +355,6 @@ where }; } } - - info!("Builder shutting down"); - self.global_cancellation.cancel(); - for handle in inner_jobs_handles { - handle - .await - .map_err(|err| warn!(?err, "Job handle await error")) - .unwrap_or_default(); - } Ok(()) } From 190a10e7ddb7e865c6b651407dda2fc00fcf79c3 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 12:35:38 -0300 Subject: [PATCH 16/26] final kill --- crates/rbuilder/src/live_builder/cli.rs | 18 +++++++--------- .../src/live_builder/process_killer.rs | 19 +++++++++++++++-- crates/rbuilder/src/live_builder/watchdog.rs | 21 +++++++------------ 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 2aff740e4..3bc2f151c 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -3,7 +3,6 @@ use rbuilder_config::load_toml_config; use serde::de::DeserializeOwned; use std::{ fmt::Debug, - io::Write, path::PathBuf, sync::{atomic::AtomicBool, Arc}, }; @@ -17,7 +16,7 @@ use crate::{ builders::{BacktestSimulateBlockInput, Block}, PartialBlockExecutionTracer, }, - live_builder::process_killer::{FLUSH_TRACE_TIME, MAX_WAIT_TIME}, + live_builder::process_killer::{ensure_tracing_buffers_flushed, ProcessKiller, MAX_WAIT_TIME}, provider::StateProviderFactory, telemetry, utils::{bls::generate_random_bls_address, build_info::Version}, @@ -137,11 +136,7 @@ where let provider = config.base_config().create_reth_provider_factory(false)?; run_builder(provider, config, on_run, ready_to_build).await }; - // Flush the stdout and stderr buffers so all tracing messages are flushed. - std::io::stdout().flush().ok(); - std::io::stderr().flush().ok(); - // Small delay to let any async work complete so flushed buffers are actually flushed. - std::thread::sleep(FLUSH_TRACE_TIME); + ensure_tracing_buffers_flushed(); res } @@ -165,18 +160,21 @@ where .await; }; - let ctrlc = tokio::spawn(async move { + tokio::spawn(async move { tokio::select! { _ = ctrl_c() => { tracing::info!("Received SIGINT, closing down..."); }, _ = terminate => { tracing::info!("Received SIGTERM, closing down..."); }, + _ = cancel.cancelled() => { tracing::info!("Received cancellation token cancellation, closing down..."); }, } - cancel.cancel() + cancel.cancel(); + // Just in case the main thread fails to end gracefully, we kill it abruptly. + // We should never reach this the "process::exit" inside wait_and_kill if the main thread ended (as expected). + ProcessKiller::wait_and_kill("Main thread received termination signal"); }); if let Some(on_run) = on_run { on_run(); } builder.run(ready_to_build).await?; - ctrlc.await.unwrap_or_default(); error!("Main thread waiting to die..."); std::thread::sleep(MAX_WAIT_TIME); error!("Main thread exiting"); diff --git a/crates/rbuilder/src/live_builder/process_killer.rs b/crates/rbuilder/src/live_builder/process_killer.rs index 2da8dd7c2..3d23ba6b8 100644 --- a/crates/rbuilder/src/live_builder/process_killer.rs +++ b/crates/rbuilder/src/live_builder/process_killer.rs @@ -3,7 +3,7 @@ //! some modules need to finish their work so we must give them some time before killing the process. //! Here we centralize all this hacky stuff so at least we can see all the constants in one place. -use std::time::Duration; +use std::{io::Write, time::Duration}; use alloy_eips::merge::SLOT_DURATION_SECS; use tokio_util::sync::CancellationToken; @@ -30,8 +30,9 @@ pub const MAX_WAIT_TIME: Duration = Duration::from_secs(MAX_WAIT_TIME_SECONDS); pub const FLUSH_TRACE_TIME_MILLI_SECONDS: u64 = 200; pub const FLUSH_TRACE_TIME: Duration = Duration::from_secs(FLUSH_TRACE_TIME_MILLI_SECONDS); +/// Time we wait before killing the process abruptly in ProcessKiller::kill(). /// We add 1 second to allow the process to finish its work and exit gracefully. -const PROCESS_KILLER_WAIT_TIME: Duration = Duration::from_secs(MAX_WAIT_TIME_SECONDS + 1); +pub const PROCESS_KILLER_WAIT_TIME: Duration = Duration::from_secs(MAX_WAIT_TIME_SECONDS + 1); #[derive(Debug, Clone)] pub struct ProcessKiller { @@ -50,8 +51,22 @@ impl ProcessKiller { "Process killing started, signaling cancellation token and waiting" ); self.cancellation_token.cancel(); + Self::wait_and_kill(reason); + } + + /// Waits some time to give the process a chance to finish its work and exit gracefully and then kills it abruptly. + pub fn wait_and_kill(reason: &str) { std::thread::sleep(PROCESS_KILLER_WAIT_TIME); error!(reason, "Killing process"); + ensure_tracing_buffers_flushed(); std::process::exit(1); } } + +pub fn ensure_tracing_buffers_flushed() { + // Flush the stdout and stderr buffers so all tracing messages are flushed. + let _ = std::io::stdout().flush(); + let _ = std::io::stderr().flush(); + // Small delay to let any async work complete so flushed buffers are actually flushed. + std::thread::sleep(FLUSH_TRACE_TIME); +} diff --git a/crates/rbuilder/src/live_builder/watchdog.rs b/crates/rbuilder/src/live_builder/watchdog.rs index c35625ac0..23c9fea50 100644 --- a/crates/rbuilder/src/live_builder/watchdog.rs +++ b/crates/rbuilder/src/live_builder/watchdog.rs @@ -1,7 +1,6 @@ use crate::live_builder::process_killer::ProcessKiller; use flume::RecvTimeoutError; use std::{io, time::Duration}; -use tracing::info; /// Spawns a thread that will kill the process if there is no events sent on the channel /// for the timeout time. @@ -14,20 +13,16 @@ pub fn spawn_watchdog_thread( let (sender, receiver) = flume::unbounded(); std::thread::Builder::new() .name(String::from("watchdog")) - .spawn(move || { - loop { - match receiver.recv_timeout(timeout) { - Ok(()) => {} - Err(RecvTimeoutError::Timeout) => { - process_killer.kill(format!("Watchdog timeout: {}", context).as_str()); - } - Err(RecvTimeoutError::Disconnected) => { - break; - } + .spawn(move || loop { + match receiver.recv_timeout(timeout) { + Ok(()) => {} + Err(RecvTimeoutError::Timeout) => { + process_killer.kill(format!("Watchdog timeout: {}", context).as_str()); + } + Err(RecvTimeoutError::Disconnected) => { + break; } } - info!(context, "Watchdog closed"); })?; - Ok(sender) } From 4e65ded96dbed8f71224684b82fca7ce73a4ef37 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 13:04:31 -0300 Subject: [PATCH 17/26] trying to fix CI --- crates/rbuilder/src/live_builder/cli.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 3bc2f151c..bf031bcbf 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -9,7 +9,7 @@ use std::{ use sysperf::{format_results, gather_system_info, run_all_benchmarks}; use tokio::signal::{ctrl_c, unix::SignalKind}; use tokio_util::sync::CancellationToken; -use tracing::error; +use tracing::{error, info}; use crate::{ building::{ @@ -175,8 +175,9 @@ where on_run(); } builder.run(ready_to_build).await?; - error!("Main thread waiting to die..."); + info!("Main thread waiting to die..."); std::thread::sleep(MAX_WAIT_TIME); - error!("Main thread exiting"); - Ok(()) + info!("Main thread exiting"); + ensure_tracing_buffers_flushed(); + std::process::exit(1); } From 211808bc5e6b384f59aad8c3f8d6a14ba53bc9d4 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 13:04:51 -0300 Subject: [PATCH 18/26] msg --- crates/rbuilder/src/live_builder/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index bf031bcbf..657721472 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -177,7 +177,7 @@ where builder.run(ready_to_build).await?; info!("Main thread waiting to die..."); std::thread::sleep(MAX_WAIT_TIME); - info!("Main thread exiting"); + info!("Main thread killing process"); ensure_tracing_buffers_flushed(); std::process::exit(1); } From 38d8117b9f0dd9650cb321cb615e4a128421a74c Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 14:17:27 -0300 Subject: [PATCH 19/26] cl test2 --- crates/rbuilder/src/live_builder/cli.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 657721472..4d0ff6c5d 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -179,5 +179,6 @@ where std::thread::sleep(MAX_WAIT_TIME); info!("Main thread killing process"); ensure_tracing_buffers_flushed(); + info!("Main thread killing process2"); std::process::exit(1); } From ec9f57b093c5c136c83766219e3d2805f8a8871f Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 14:27:03 -0300 Subject: [PATCH 20/26] cl test --- crates/rbuilder/src/live_builder/cli.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 4d0ff6c5d..f3b73915d 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -177,8 +177,8 @@ where builder.run(ready_to_build).await?; info!("Main thread waiting to die..."); std::thread::sleep(MAX_WAIT_TIME); - info!("Main thread killing process"); - ensure_tracing_buffers_flushed(); + info!("Main thread killing process!"); + //ensure_tracing_buffers_flushed(); info!("Main thread killing process2"); std::process::exit(1); } From b80bad0dd526040cdf0fc794c9c232c73673865d Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 14:39:28 -0300 Subject: [PATCH 21/26] ci test --- crates/rbuilder/src/live_builder/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index f3b73915d..3f98149fd 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -9,7 +9,7 @@ use std::{ use sysperf::{format_results, gather_system_info, run_all_benchmarks}; use tokio::signal::{ctrl_c, unix::SignalKind}; use tokio_util::sync::CancellationToken; -use tracing::{error, info}; +use tracing::info; use crate::{ building::{ From 92d468b00bd53d26d309eede45d2a7d68745fbbb Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 14:48:31 -0300 Subject: [PATCH 22/26] ci test --- crates/rbuilder/src/live_builder/cli.rs | 16 +++++++++------- .../rbuilder/src/live_builder/process_killer.rs | 7 +++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 3f98149fd..00458146a 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -167,18 +167,20 @@ where _ = cancel.cancelled() => { tracing::info!("Received cancellation token cancellation, closing down..."); }, } cancel.cancel(); - // Just in case the main thread fails to end gracefully, we kill it abruptly. - // We should never reach this the "process::exit" inside wait_and_kill if the main thread ended (as expected). + // Just in case the main thread fails to end gracefully, we kill it abruptly so the service stops. + // We should never reach the "process::exit" inside wait_and_kill if the main thread ended (as expected). ProcessKiller::wait_and_kill("Main thread received termination signal"); }); if let Some(on_run) = on_run { on_run(); } builder.run(ready_to_build).await?; - info!("Main thread waiting to die..."); + info!( + wait_time_secs = MAX_WAIT_TIME.as_secs(), + "Main thread waiting to die..." + ); std::thread::sleep(MAX_WAIT_TIME); - info!("Main thread killing process!"); - //ensure_tracing_buffers_flushed(); - info!("Main thread killing process2"); - std::process::exit(1); + info!("Main thread exiting"); + ensure_tracing_buffers_flushed(); + Ok(()) } diff --git a/crates/rbuilder/src/live_builder/process_killer.rs b/crates/rbuilder/src/live_builder/process_killer.rs index 3d23ba6b8..1240e9363 100644 --- a/crates/rbuilder/src/live_builder/process_killer.rs +++ b/crates/rbuilder/src/live_builder/process_killer.rs @@ -3,7 +3,7 @@ //! some modules need to finish their work so we must give them some time before killing the process. //! Here we centralize all this hacky stuff so at least we can see all the constants in one place. -use std::{io::Write, time::Duration}; +use std::time::Duration; use alloy_eips::merge::SLOT_DURATION_SECS; use tokio_util::sync::CancellationToken; @@ -63,10 +63,9 @@ impl ProcessKiller { } } +/// Tries to guarantee that all tracing is flushed. +/// Sadly my attempts to flush the buffers failed (std::io::stdout().flush() hanged) so we just sleep for a while. pub fn ensure_tracing_buffers_flushed() { - // Flush the stdout and stderr buffers so all tracing messages are flushed. - let _ = std::io::stdout().flush(); - let _ = std::io::stderr().flush(); // Small delay to let any async work complete so flushed buffers are actually flushed. std::thread::sleep(FLUSH_TRACE_TIME); } From de842f384701abefa82a467bbb119a04b24a74b7 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 14:59:29 -0300 Subject: [PATCH 23/26] bug fix secs to millisecs --- crates/rbuilder/src/live_builder/process_killer.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/rbuilder/src/live_builder/process_killer.rs b/crates/rbuilder/src/live_builder/process_killer.rs index 1240e9363..bf13b26ed 100644 --- a/crates/rbuilder/src/live_builder/process_killer.rs +++ b/crates/rbuilder/src/live_builder/process_killer.rs @@ -27,8 +27,7 @@ pub const MAX_WAIT_TIME_SECONDS: u64 = BLOCK_BUILDING_CLOSE_TIME_SECONDS; pub const MAX_WAIT_TIME: Duration = Duration::from_secs(MAX_WAIT_TIME_SECONDS); /// Time needed to let the tracing subscriber to flush its buffers. -pub const FLUSH_TRACE_TIME_MILLI_SECONDS: u64 = 200; -pub const FLUSH_TRACE_TIME: Duration = Duration::from_secs(FLUSH_TRACE_TIME_MILLI_SECONDS); +pub const FLUSH_TRACE_TIME: Duration = Duration::from_millis(200); /// Time we wait before killing the process abruptly in ProcessKiller::kill(). /// We add 1 second to allow the process to finish its work and exit gracefully. From 656c915bb6503ca18d33cfa06c91219f93c90762 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 15:04:20 -0300 Subject: [PATCH 24/26] flushing --- crates/rbuilder/src/live_builder/process_killer.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/rbuilder/src/live_builder/process_killer.rs b/crates/rbuilder/src/live_builder/process_killer.rs index bf13b26ed..dde5561fd 100644 --- a/crates/rbuilder/src/live_builder/process_killer.rs +++ b/crates/rbuilder/src/live_builder/process_killer.rs @@ -3,7 +3,7 @@ //! some modules need to finish their work so we must give them some time before killing the process. //! Here we centralize all this hacky stuff so at least we can see all the constants in one place. -use std::time::Duration; +use std::{io::Write, time::Duration}; use alloy_eips::merge::SLOT_DURATION_SECS; use tokio_util::sync::CancellationToken; @@ -62,9 +62,11 @@ impl ProcessKiller { } } -/// Tries to guarantee that all tracing is flushed. -/// Sadly my attempts to flush the buffers failed (std::io::stdout().flush() hanged) so we just sleep for a while. +/// Tries to guarantee that all tracing is flushed so we don't loose any final messages. pub fn ensure_tracing_buffers_flushed() { + // Flush the stdout and stderr buffers so all tracing messages are flushed. + let _ = std::io::stdout().flush(); + let _ = std::io::stderr().flush(); // Small delay to let any async work complete so flushed buffers are actually flushed. std::thread::sleep(FLUSH_TRACE_TIME); } From 65591b0465592cf5c9c2825e9e5cf5d13228f647 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 15:08:55 -0300 Subject: [PATCH 25/26] CI polishing --- crates/rbuilder/src/live_builder/cli.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/rbuilder/src/live_builder/cli.rs b/crates/rbuilder/src/live_builder/cli.rs index 00458146a..a9364c3f3 100644 --- a/crates/rbuilder/src/live_builder/cli.rs +++ b/crates/rbuilder/src/live_builder/cli.rs @@ -16,7 +16,7 @@ use crate::{ builders::{BacktestSimulateBlockInput, Block}, PartialBlockExecutionTracer, }, - live_builder::process_killer::{ensure_tracing_buffers_flushed, ProcessKiller, MAX_WAIT_TIME}, + live_builder::process_killer::{ProcessKiller, MAX_WAIT_TIME}, provider::StateProviderFactory, telemetry, utils::{bls::generate_random_bls_address, build_info::Version}, @@ -136,7 +136,6 @@ where let provider = config.base_config().create_reth_provider_factory(false)?; run_builder(provider, config, on_run, ready_to_build).await }; - ensure_tracing_buffers_flushed(); res } @@ -181,6 +180,5 @@ where ); std::thread::sleep(MAX_WAIT_TIME); info!("Main thread exiting"); - ensure_tracing_buffers_flushed(); Ok(()) } From 8c960ea4454704a1a2a8bb18160cac1aff1470b4 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Sat, 15 Nov 2025 15:26:02 -0300 Subject: [PATCH 26/26] Upper case message --- crates/rbuilder-utils/src/clickhouse/indexer.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/rbuilder-utils/src/clickhouse/indexer.rs b/crates/rbuilder-utils/src/clickhouse/indexer.rs index 7d68cfd2f..fe6ad0c5e 100644 --- a/crates/rbuilder-utils/src/clickhouse/indexer.rs +++ b/crates/rbuilder-utils/src/clickhouse/indexer.rs @@ -224,7 +224,7 @@ impl InserterRunner { - tracing::info!(target,table_name = name, "clickhouse indexer channel closed"); + tracing::info!(target,table_name = name, "Clickhouse indexer channel closed"); } guard = shutdown => { tracing::info!(target,table_name = name, "Received shutdown for indexer, performing cleanup"); @@ -234,10 +234,10 @@ impl InserterRunner { - tracing::info!(target, ?quantities, table_name = name, "finalized clickhouse inserter"); + tracing::info!(target, ?quantities, table_name = name, "Finalized clickhouse inserter"); } Err(e) => { - tracing::error!(target,error = ?e, table_name = name, "failed to write end insertion of indexer"); + tracing::error!(target,error = ?e, table_name = name, "Failed to write end insertion of indexer"); } } drop(shutdown_guard);