diff --git a/Cargo.lock b/Cargo.lock index 9cf4d96415c0..6a749e8a683e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,22 +158,22 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -662,9 +662,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.14.1" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" +checksum = "5932a7d9d28b0d2ea34c6b3779d35e3dd6f6345317c34e73438c4f1f29144151" dependencies = [ "aws-lc-sys", "zeroize", @@ -672,9 +672,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.32.3" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" +checksum = "1826f2e4cfc2cd19ee53c42fbf68e2f81ec21108e0b7ecf6a71cf062137360fc" dependencies = [ "bindgen", "cc", @@ -753,9 +753,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.91.0" +version = "1.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f8090151d4d1e971269957b10dbf287bba551ab812e591ce0516b1c73b75d27" +checksum = "a0c7808adcff8333eaa76a849e6de926c6ac1a1268b9fd6afe32de9c29ef29d2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1313,9 +1313,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "bytes-utils" @@ -1363,9 +1363,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.45" +version = "1.2.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35900b6c8d709fb1d854671ae27aeaa9eec2f8b01b364e1619a40da3e6fe2afe" +checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" dependencies = [ "find-msvc-tools", "jobserver", @@ -1737,9 +1737,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -3062,9 +3062,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "fixedbitset" @@ -3263,9 +3263,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -3502,9 +3502,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -3570,9 +3570,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56" dependencies = [ "base64 0.22.1", "bytes", @@ -3764,9 +3764,9 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" dependencies = [ "console 0.16.1", "portable-atomic", @@ -5002,9 +5002,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index 31909415a286..dec0ddf70658 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -1824,7 +1824,7 @@ STORED AS PARQUET; assert!(explain.contains("output_rows=128")); // Read 1 row group assert!(explain.contains("t@0 < 1372708809")); // Dynamic filter was applied assert!( - explain.contains("pushdown_rows_matched=128, pushdown_rows_pruned=99872"), + explain.contains("pushdown_rows_matched=128, pushdown_rows_pruned=99.87 K"), "{explain}" ); // Pushdown pruned most rows diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs index e620b2326796..cd0017f4a346 100644 --- a/datafusion/execution/src/memory_pool/mod.rs +++ b/datafusion/execution/src/memory_pool/mod.rs @@ -503,6 +503,56 @@ pub fn human_readable_size(size: usize) -> String { format!("{value:.1} {unit}") } +/// Present count in human-readable form with K, M, B, T suffixes +pub fn human_readable_count(count: usize) -> String { + let count = count as u64; + let (value, unit) = { + if count >= 1_000_000_000_000 { + (count as f64 / 1_000_000_000_000.0, " T") + } else if count >= 1_000_000_000 { + (count as f64 / 1_000_000_000.0, " B") + } else if count >= 1_000_000 { + (count as f64 / 1_000_000.0, " M") + } else if count >= 1_000 { + (count as f64 / 1_000.0, " K") + } else { + return count.to_string(); + } + }; + + // Format with appropriate precision + // For values >= 100, show 1 decimal place (e.g., 123.4 K) + // For values < 100, show 2 decimal places (e.g., 10.12 K) + if value >= 100.0 { + format!("{value:.1}{unit}") + } else { + format!("{value:.2}{unit}") + } +} + +/// Present duration in human-readable form with 2 decimal places +pub fn human_readable_duration(nanos: u64) -> String { + const NANOS_PER_SEC: f64 = 1_000_000_000.0; + const NANOS_PER_MILLI: f64 = 1_000_000.0; + const NANOS_PER_MICRO: f64 = 1_000.0; + + let nanos_f64 = nanos as f64; + + if nanos >= 1_000_000_000 { + // >= 1 second: show in seconds + format!("{:.2}s", nanos_f64 / NANOS_PER_SEC) + } else if nanos >= 1_000_000 { + // >= 1 millisecond: show in milliseconds + format!("{:.2}ms", nanos_f64 / NANOS_PER_MILLI) + } else if nanos >= 1_000 { + // >= 1 microsecond: show in microseconds + format!("{:.2}µs", nanos_f64 / NANOS_PER_MICRO) + } else { + // < 1 microsecond: show in nanoseconds + format!("{nanos}ns") + } +} + #[cfg(test)] mod tests { use super::*; @@ -599,4 +649,57 @@ mod tests { assert_eq!(r2.size(), 25); assert_eq!(pool.reserved(), 28); } + + #[test] + fn test_human_readable_count() { + // Test small numbers (< 1000) - should display as-is + assert_eq!(human_readable_count(0), "0"); + assert_eq!(human_readable_count(1), "1"); + assert_eq!(human_readable_count(999), "999"); + + // Test thousands (K) + assert_eq!(human_readable_count(1_000), "1.00 K"); + assert_eq!(human_readable_count(10_100), "10.10 K"); + assert_eq!(human_readable_count(1_532), "1.53 K"); + assert_eq!(human_readable_count(99_999), "100.00 K"); + + // Test millions (M) + assert_eq!(human_readable_count(1_000_000), "1.00 M"); + assert_eq!(human_readable_count(1_532_000), "1.53 M"); + assert_eq!(human_readable_count(99_000_000), "99.00 M"); + assert_eq!(human_readable_count(123_456_789), "123.5 M"); + + // Test billions (B) + assert_eq!(human_readable_count(1_000_000_000), "1.00 B"); + assert_eq!(human_readable_count(1_532_000_000), "1.53 B"); + assert_eq!(human_readable_count(999_999_999_999), "1000.0 B"); + + // Test trillions (T) + assert_eq!(human_readable_count(1_000_000_000_000), "1.00 T"); + assert_eq!(human_readable_count(42_000_000_000_000), "42.00 T"); + } + + #[test] + fn test_human_readable_duration() { + // Test nanoseconds (< 1µs) + assert_eq!(human_readable_duration(0), "0ns"); + assert_eq!(human_readable_duration(1), "1ns"); + assert_eq!(human_readable_duration(999), "999ns"); + + // Test microseconds (1µs to < 1ms) + assert_eq!(human_readable_duration(1_000), "1.00µs"); + assert_eq!(human_readable_duration(1_234), "1.23µs"); + assert_eq!(human_readable_duration(999_999), "1000.00µs"); + + // Test milliseconds (1ms to < 1s) + assert_eq!(human_readable_duration(1_000_000), "1.00ms"); + assert_eq!(human_readable_duration(11_295_377), "11.30ms"); + assert_eq!(human_readable_duration(1_234_567), "1.23ms"); + assert_eq!(human_readable_duration(999_999_999), "1000.00ms"); + + // Test seconds (>= 1s) + assert_eq!(human_readable_duration(1_000_000_000), "1.00s"); + assert_eq!(human_readable_duration(1_234_567_890), "1.23s"); + assert_eq!(human_readable_duration(42_000_000_000), "42.00s"); + } } diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs index 7f31f757944d..9f9cf2791f66 100644 --- a/datafusion/physical-plan/src/metrics/value.rs +++ b/datafusion/physical-plan/src/metrics/value.rs @@ -20,7 +20,9 @@ use super::CustomMetricValue; use chrono::{DateTime, Utc}; use datafusion_common::instant::Instant; -use datafusion_execution::memory_pool::human_readable_size; +use datafusion_execution::memory_pool::{ + human_readable_count, human_readable_duration, human_readable_size, +}; use parking_lot::Mutex; use std::{ borrow::{Borrow, Cow}, @@ -49,7 +51,7 @@ impl PartialEq for Count { impl Display for Count { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.value()) + write!(f, "{}", human_readable_count(self.value())) } } @@ -169,8 +171,7 @@ impl PartialEq for Time { impl Display for Time { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - let duration = Duration::from_nanos(self.value() as u64); - write!(f, "{duration:?}") + write!(f, "{}", human_readable_duration(self.value() as u64)) } } @@ -379,7 +380,12 @@ impl Display for PruningMetrics { let matched = self.matched.load(Ordering::Relaxed); let total = self.pruned.load(Ordering::Relaxed) + matched; - write!(f, "{total} total → {matched} matched") + write!( + f, + "{} total → {} matched", + human_readable_count(total), + human_readable_count(matched) + ) } } @@ -506,12 +512,18 @@ impl Display for RatioMetrics { if part == 0 { write!(f, "N/A (0/0)") } else { - write!(f, "N/A ({part}/0)") + write!(f, "N/A ({}/0)", human_readable_count(part)) } } else { let percentage = (part as f64 / total as f64) * 100.0; - write!(f, "{}% ({part}/{total})", fmt_significant(percentage, 2)) + write!( + f, + "{}% ({}/{})", + fmt_significant(percentage, 2), + human_readable_count(part), + human_readable_count(total) + ) } } } @@ -939,8 +951,14 @@ impl Display for MetricValue { let readable_count = human_readable_size(count.value()); write!(f, "{readable_count}") } - Self::CurrentMemoryUsage(gauge) | Self::Gauge { gauge, .. } => { - write!(f, "{gauge}") + Self::CurrentMemoryUsage(gauge) => { + // CurrentMemoryUsage is in bytes, format like SpilledBytes + let readable_size = human_readable_size(gauge.value()); + write!(f, "{readable_size}") + } + Self::Gauge { gauge, .. } => { + // Generic gauge metrics - format with human-readable count + write!(f, "{}", human_readable_count(gauge.value())) } Self::ElapsedCompute(time) | Self::Time { time, .. } => { // distinguish between no time recorded and very small @@ -1110,7 +1128,7 @@ mod tests { time.add_duration(Duration::from_nanos(1042)); for value in &values { - assert_eq!("1.042µs", value.to_string(), "value {value:?}"); + assert_eq!("1.04µs", value.to_string(), "value {value:?}"); } } @@ -1137,7 +1155,7 @@ mod tests { }; tiny_ratio_metrics.add_part(1); tiny_ratio_metrics.add_total(3000); - assert_eq!("0.033% (1/3000)", tiny_ratio.to_string()); + assert_eq!("0.033% (1/3.00 K)", tiny_ratio.to_string()); } #[test] @@ -1257,4 +1275,104 @@ mod tests { "Expected ~10ms total, got {new_recorded} ns", ); } + + #[test] + fn test_human_readable_metric_formatting() { + // Test Count formatting with various sizes + let small_count = Count::new(); + small_count.add(42); + assert_eq!( + MetricValue::OutputRows(small_count.clone()).to_string(), + "42" + ); + + let thousand_count = Count::new(); + thousand_count.add(10_100); + assert_eq!( + MetricValue::OutputRows(thousand_count.clone()).to_string(), + "10.10 K" + ); + + let million_count = Count::new(); + million_count.add(1_532_000); + assert_eq!( + MetricValue::SpilledRows(million_count.clone()).to_string(), + "1.53 M" + ); + + let billion_count = Count::new(); + billion_count.add(2_500_000_000); + assert_eq!( + MetricValue::OutputBatches(billion_count.clone()).to_string(), + "2.50 B" + ); + + // Test Time formatting with various durations + let micros_time = Time::new(); + micros_time.add_duration(Duration::from_nanos(1_234)); + assert_eq!( + MetricValue::ElapsedCompute(micros_time.clone()).to_string(), + "1.23µs" + ); + + let millis_time = Time::new(); + millis_time.add_duration(Duration::from_nanos(11_295_377)); + assert_eq!( + MetricValue::ElapsedCompute(millis_time.clone()).to_string(), + "11.30ms" + ); + + let seconds_time = Time::new(); + seconds_time.add_duration(Duration::from_nanos(1_234_567_890)); + assert_eq!( + MetricValue::ElapsedCompute(seconds_time.clone()).to_string(), + "1.23s" + ); + + // Test CurrentMemoryUsage formatting (should use size, not count) + let mem_gauge = Gauge::new(); + mem_gauge.add(100 * MB as usize); + assert_eq!( + MetricValue::CurrentMemoryUsage(mem_gauge.clone()).to_string(), + "100.0 MB" + ); + + // Test custom Gauge formatting (should use count) + let custom_gauge = Gauge::new(); + custom_gauge.add(50_000); + assert_eq!( + MetricValue::Gauge { + name: "custom".into(), + gauge: custom_gauge.clone() + } + .to_string(), + "50.00 K" + ); + + // Test PruningMetrics formatting + let pruning = PruningMetrics::new(); + pruning.add_matched(500_000); + pruning.add_pruned(500_000); + assert_eq!( + MetricValue::PruningMetrics { + name: "test_pruning".into(), + pruning_metrics: pruning.clone() + } + .to_string(), + "1.00 M total → 500.0 K matched" + ); + + // Test RatioMetrics formatting + let ratio = RatioMetrics::new(); + ratio.add_part(250_000); + ratio.add_total(1_000_000); + assert_eq!( + MetricValue::Ratio { + name: "test_ratio".into(), + ratio_metrics: ratio.clone() + } + .to_string(), + "25% (250.0 K/1.00 M)" + ); + } }