diff --git a/.env.example b/.env.example index a171ead..63f8bca 100644 --- a/.env.example +++ b/.env.example @@ -10,6 +10,7 @@ TIPS_INGRESS_KAFKA_AUDIT_TOPIC=tips-audit TIPS_INGRESS_LOG_LEVEL=info TIPS_INGRESS_SEND_TRANSACTION_DEFAULT_LIFETIME_SECONDS=10800 TIPS_INGRESS_RPC_SIMULATION=http://localhost:8549 +TIPS_INGRESS_METRICS_ADDR=0.0.0.0:9002 # Audit service configuration TIPS_AUDIT_KAFKA_PROPERTIES_FILE=/app/docker/audit-kafka-properties diff --git a/Cargo.lock b/Cargo.lock index 2d02a77..df78ce3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3729,6 +3729,43 @@ dependencies = [ "syn 2.0.108", ] +[[package]] +name = "metrics-exporter-prometheus" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b166dea96003ee2531cf14833efedced545751d800f03535801d833313f8c15" +dependencies = [ + "base64 0.22.1", + "http-body-util", + "hyper 1.7.0", + "hyper-rustls 0.27.7", + "hyper-util", + "indexmap 2.12.0", + "ipnet", + "metrics", + "metrics-util", + "quanta", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-util" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe8db7a05415d0f919ffb905afa37784f71901c9a773188876984b4f769ab986" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.15.5", + "metrics", + "quanta", + "rand 0.9.2", + "rand_xoshiro", + "sketches-ddsketch", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4486,6 +4523,21 @@ dependencies = [ "unarray", ] +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -4639,6 +4691,24 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core 0.9.3", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "rayon" version = "1.11.0" @@ -6502,6 +6572,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +[[package]] +name = "sketches-ddsketch" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" + [[package]] name = "slab" version = "0.4.11" @@ -6947,6 +7023,9 @@ dependencies = [ "clap", "dotenvy", "jsonrpsee", + "metrics", + "metrics-derive", + "metrics-exporter-prometheus", "op-alloy-consensus", "op-alloy-network", "op-revm", diff --git a/Cargo.toml b/Cargo.toml index 2709a2b..b882b45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,3 +63,8 @@ backon = "1.5.2" op-revm = { version = "12.0.0", default-features = false } revm-context-interface = "10.2.0" alloy-signer-local = "1.0.36" + +# Misc +metrics = "0.24.1" +metrics-derive = "0.1" +metrics-exporter-prometheus = { version = "0.17.0", features = ["http-listener"]} diff --git a/crates/ingress-rpc/Cargo.toml b/crates/ingress-rpc/Cargo.toml index 5df7ec6..5099da8 100644 --- a/crates/ingress-rpc/Cargo.toml +++ b/crates/ingress-rpc/Cargo.toml @@ -35,3 +35,6 @@ op-revm.workspace = true revm-context-interface.workspace = true alloy-signer-local.workspace = true reth-optimism-evm.workspace = true +metrics.workspace = true +metrics-derive.workspace = true +metrics-exporter-prometheus.workspace = true diff --git a/crates/ingress-rpc/src/bin/main.rs b/crates/ingress-rpc/src/bin/main.rs index 01f821e..0d4ebca 100644 --- a/crates/ingress-rpc/src/bin/main.rs +++ b/crates/ingress-rpc/src/bin/main.rs @@ -4,10 +4,11 @@ use jsonrpsee::server::Server; use op_alloy_network::Optimism; use rdkafka::ClientConfig; use rdkafka::producer::FutureProducer; -use std::net::IpAddr; +use std::net::{IpAddr, SocketAddr}; use tips_audit::KafkaBundleEventPublisher; use tips_core::kafka::load_kafka_config_from_file; use tips_core::logger::init_logger; +use tips_ingress_rpc::metrics::init_prometheus_exporter; use tips_ingress_rpc::queue::KafkaQueuePublisher; use tips_ingress_rpc::service::{IngressApiServer, IngressService}; use tracing::info; @@ -70,6 +71,14 @@ struct Config { /// URL of the simulation RPC service for bundle metering #[arg(long, env = "TIPS_INGRESS_RPC_SIMULATION")] simulation_rpc: Url, + + /// Port to bind the Prometheus metrics server to + #[arg( + long, + env = "TIPS_INGRESS_METRICS_ADDR", + default_value = "0.0.0.0:9002" + )] + metrics_addr: SocketAddr, } #[tokio::main] @@ -80,12 +89,15 @@ async fn main() -> anyhow::Result<()> { init_logger(&config.log_level); + init_prometheus_exporter(config.metrics_addr).expect("Failed to install Prometheus exporter"); + info!( message = "Starting ingress service", address = %config.address, port = config.port, mempool_url = %config.mempool_url, - simulation_rpc = %config.simulation_rpc + simulation_rpc = %config.simulation_rpc, + metrics_address = %config.metrics_addr, ); let provider: RootProvider = ProviderBuilder::new() diff --git a/crates/ingress-rpc/src/lib.rs b/crates/ingress-rpc/src/lib.rs index f3f3c92..db50bc2 100644 --- a/crates/ingress-rpc/src/lib.rs +++ b/crates/ingress-rpc/src/lib.rs @@ -1,3 +1,4 @@ +pub mod metrics; pub mod queue; pub mod service; pub mod validation; diff --git a/crates/ingress-rpc/src/metrics.rs b/crates/ingress-rpc/src/metrics.rs new file mode 100644 index 0000000..eb2e34c --- /dev/null +++ b/crates/ingress-rpc/src/metrics.rs @@ -0,0 +1,40 @@ +use metrics::Histogram; +use metrics_derive::Metrics; +use metrics_exporter_prometheus::PrometheusBuilder; +use std::net::SocketAddr; +use tokio::time::Duration; + +/// `record_histogram` lets us record with tags. +pub fn record_histogram(rpc_latency: Duration, rpc: String) { + metrics::histogram!("tips_ingress_rpc_rpc_latency", "rpc" => rpc) + .record(rpc_latency.as_secs_f64()); +} + +/// Metrics for the `tips_ingress_rpc` component. +/// Conventions: +/// - Durations are recorded in seconds (histograms). +/// - Counters are monotonic event counts. +/// - Gauges reflect the current value/state. +#[derive(Metrics, Clone)] +#[metrics(scope = "tips_ingress_rpc")] +pub struct Metrics { + #[metric(describe = "Duration of validate_tx")] + pub validate_tx_duration: Histogram, + + #[metric(describe = "Duration of validate_bundle")] + pub validate_bundle_duration: Histogram, + + #[metric(describe = "Duration of meter_bundle")] + pub meter_bundle_duration: Histogram, + + #[metric(describe = "Duration of send_raw_transaction")] + pub send_raw_transaction_duration: Histogram, +} + +/// Initialize Prometheus metrics exporter +pub fn init_prometheus_exporter(addr: SocketAddr) -> Result<(), Box> { + PrometheusBuilder::new() + .with_http_listener(addr) + .install() + .map_err(|e| Box::new(e) as Box) +} diff --git a/crates/ingress-rpc/src/service.rs b/crates/ingress-rpc/src/service.rs index 233c2ad..f53cdc6 100644 --- a/crates/ingress-rpc/src/service.rs +++ b/crates/ingress-rpc/src/service.rs @@ -16,8 +16,10 @@ use tips_core::{ AcceptedBundle, BLOCK_TIME, Bundle, BundleExtensions, BundleHash, CancelBundle, MeterBundleResponse, }; +use tokio::time::Instant; use tracing::{info, warn}; +use crate::metrics::{Metrics, record_histogram}; use crate::queue::QueuePublisher; use crate::validation::{AccountInfoLookup, L1BlockInfoLookup, validate_bundle, validate_tx}; @@ -43,6 +45,7 @@ pub struct IngressService { bundle_queue: Queue, audit_publisher: Audit, send_transaction_default_lifetime_seconds: u64, + metrics: Metrics, } impl IngressService { @@ -61,6 +64,7 @@ impl IngressService { bundle_queue: queue, audit_publisher, send_transaction_default_lifetime_seconds, + metrics: Metrics::default(), } } } @@ -116,6 +120,7 @@ where } async fn send_raw_transaction(&self, data: Bytes) -> RpcResult { + let start = Instant::now(); let transaction = self.validate_tx(&data).await?; let expiry_timestamp = SystemTime::now() @@ -175,6 +180,9 @@ where warn!(message = "Failed to publish audit event", bundle_id = %accepted_bundle.uuid(), error = %e); } + self.metrics + .send_raw_transaction_duration + .record(start.elapsed().as_secs_f64()); Ok(transaction.tx_hash()) } } @@ -185,6 +193,7 @@ where Audit: BundleEventPublisher + Sync + Send + 'static, { async fn validate_tx(&self, data: &Bytes) -> RpcResult> { + let start = Instant::now(); if data.is_empty() { return Err(EthApiError::EmptyRawTransactionData.into_rpc_err()); } @@ -204,10 +213,14 @@ where .await?; validate_tx(account, &transaction, data, &mut l1_block_info).await?; + self.metrics + .validate_tx_duration + .record(start.elapsed().as_secs_f64()); Ok(transaction) } async fn validate_bundle(&self, bundle: &Bundle) -> RpcResult<()> { + let start = Instant::now(); if bundle.txs.is_empty() { return Err( EthApiError::InvalidParams("Bundle cannot have empty transactions".into()) @@ -224,6 +237,9 @@ where } validate_bundle(bundle, total_gas, tx_hashes)?; + self.metrics + .validate_bundle_duration + .record(start.elapsed().as_secs_f64()); Ok(()) } @@ -231,12 +247,14 @@ where /// is within `BLOCK_TIME` will return the `MeterBundleResponse` that can be passed along /// to the builder. async fn meter_bundle(&self, bundle: &Bundle) -> RpcResult { + let start = Instant::now(); let res: MeterBundleResponse = self .simulation_provider .client() .request("base_meterBundle", (bundle,)) .await .map_err(|e| EthApiError::InvalidParams(e.to_string()).into_rpc_err())?; + record_histogram(start.elapsed(), "base_meterBundle".to_string()); // we can save some builder payload building computation by not including bundles // that we know will take longer than the block time to execute diff --git a/crates/ingress-rpc/src/validation.rs b/crates/ingress-rpc/src/validation.rs index 4ff4946..6411d42 100644 --- a/crates/ingress-rpc/src/validation.rs +++ b/crates/ingress-rpc/src/validation.rs @@ -12,8 +12,11 @@ use reth_rpc_eth_types::{EthApiError, RpcInvalidTransactionError, SignError}; use std::collections::HashSet; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tips_core::Bundle; +use tokio::time::Instant; use tracing::warn; +use crate::metrics::record_histogram; + const MAX_BUNDLE_GAS: u64 = 25_000_000; /// Account info for a given address @@ -33,10 +36,13 @@ pub trait AccountInfoLookup: Send + Sync { #[async_trait] impl AccountInfoLookup for RootProvider { async fn fetch_account_info(&self, address: Address) -> RpcResult { + let start = Instant::now(); let account = self .get_account(address) .await .map_err(|_| EthApiError::Signing(SignError::NoAccount))?; + record_histogram(start.elapsed(), "eth_getAccount".to_string()); + Ok(AccountInfo { balance: account.balance, nonce: account.nonce, @@ -55,6 +61,7 @@ pub trait L1BlockInfoLookup: Send + Sync { #[async_trait] impl L1BlockInfoLookup for RootProvider { async fn fetch_l1_block_info(&self) -> RpcResult { + let start = Instant::now(); let block = self .get_block(BlockId::Number(BlockNumberOrTag::Latest)) .full() @@ -67,6 +74,7 @@ impl L1BlockInfoLookup for RootProvider { warn!(message = "empty latest block returned"); EthApiError::InternalEthError.into_rpc_err() })?; + record_histogram(start.elapsed(), "eth_getBlockByNumber".to_string()); let txs = block.transactions.clone(); let first_tx = txs.first_transaction().ok_or_else(|| { diff --git a/docker-compose.tips.yml b/docker-compose.tips.yml index e4613f7..97fccb9 100644 --- a/docker-compose.tips.yml +++ b/docker-compose.tips.yml @@ -8,6 +8,7 @@ services: container_name: tips-ingress-rpc ports: - "8080:8080" + - "9002:9002" env_file: - .env.docker volumes: