From baa4f9f122333179da3c0d971cb777be87d930e0 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Fri, 27 Jun 2025 12:21:57 -0600 Subject: [PATCH] client stats: do not panic if a metric changes type This can actually happen currently. Fixes BIT-5714 Signed-off-by: Matt Klein --- bd-buffer/src/ring_buffer_test.rs | 7 ++- bd-client-stats-store/src/lib.rs | 63 +++++++++++++++++---------- bd-client-stats-store/src/lib_test.rs | 40 ++++++++++++++--- bd-client-stats-store/src/test/mod.rs | 16 +++++-- bd-client-stats/src/lib.rs | 14 ------ bd-client-stats/src/stats.rs | 31 +++++++------ bd-stats-common/src/lib.rs | 18 ++++++-- bd-test-helpers/src/stats.rs | 31 +++++++++---- bd-workflows/src/config.rs | 11 +---- bd-workflows/src/metrics.rs | 5 ++- bd-workflows/src/metrics_test.rs | 9 ++-- bd-workflows/src/workflow_test.rs | 11 +---- 12 files changed, 159 insertions(+), 97 deletions(-) diff --git a/bd-buffer/src/ring_buffer_test.rs b/bd-buffer/src/ring_buffer_test.rs index 35a83228..72c55756 100644 --- a/bd-buffer/src/ring_buffer_test.rs +++ b/bd-buffer/src/ring_buffer_test.rs @@ -12,7 +12,7 @@ use bd_client_stats_store::test::StatsHelper; use bd_client_stats_store::{Collector, Counter}; use bd_proto::protos::config::v1::config::buffer_config::BufferSizes; use bd_proto::protos::config::v1::config::{BufferConfig, BufferConfigList, buffer_config}; -use bd_stats_common::{NameType, labels}; +use bd_stats_common::{MetricType, NameType, labels}; use std::path::{Path, PathBuf}; fn fake_counter() -> Counter { @@ -258,7 +258,10 @@ async fn ring_buffer_stats() { assert!( collector .find_counter( - &NameType::Global("ring_buffer:volatile_overwrite".to_string()), + &NameType::Global( + MetricType::Counter, + "ring_buffer:volatile_overwrite".to_string() + ), &labels! { "buffer_id" => "some-buffer" }, ) .unwrap() diff --git a/bd-client-stats-store/src/lib.rs b/bd-client-stats-store/src/lib.rs index 96eb6a48..7405767e 100644 --- a/bd-client-stats-store/src/lib.rs +++ b/bd-client-stats-store/src/lib.rs @@ -17,7 +17,7 @@ use bd_proto::protos::client::metric::{ DDSketchHistogram, InlineHistogramValues, }; -use bd_stats_common::{DynCounter, NameType, make_client_sketch}; +use bd_stats_common::{DynCounter, MetricType, NameType, make_client_sketch}; use parking_lot::Mutex; use sketches_rust::DDSketch; use std::collections::hash_map::Entry; @@ -49,9 +49,6 @@ const MAX_INLINE_HISTOGRAM_VALUES: usize = 5; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("Changed type")] - ChangedType, - #[error("Overflow")] Overflow, } @@ -285,12 +282,13 @@ impl Scope { fn get_common_global( inner: &mut CollectorInner, name: String, + metric_type: MetricType, labels: BTreeMap, constructor: impl FnOnce() -> MetricData, ) -> &mut MetricData { let by_name = inner .metrics_by_name - .entry(NameType::Global(name)) + .entry(NameType::Global(metric_type, name)) .or_default(); match by_name.entry(labels) { @@ -302,11 +300,17 @@ impl Scope { #[must_use] pub fn counter_with_labels(&self, name: &str, labels: BTreeMap) -> Counter { let mut inner = self.collector.inner.lock(); - match Self::get_common_global(&mut inner, self.metric_name(name), labels, || { - MetricData::Counter(Counter { - value: Arc::new(AtomicU64::new(0)), - }) - }) { + match Self::get_common_global( + &mut inner, + self.metric_name(name), + MetricType::Counter, + labels, + || { + MetricData::Counter(Counter { + value: Arc::new(AtomicU64::new(0)), + }) + }, + ) { MetricData::Counter(counter) => counter.clone(), MetricData::Histogram(_) => unreachable!(), } @@ -325,9 +329,13 @@ impl Scope { #[must_use] pub fn histogram_with_labels(&self, name: &str, labels: BTreeMap) -> Histogram { let mut inner = self.collector.inner.lock(); - match Self::get_common_global(&mut inner, self.metric_name(name), labels, || { - MetricData::Histogram(Histogram::default()) - }) { + match Self::get_common_global( + &mut inner, + self.metric_name(name), + MetricType::Histogram, + labels, + || MetricData::Histogram(Histogram::default()), + ) { MetricData::Histogram(histogram) => histogram.clone(), MetricData::Counter(_) => unreachable!(), } @@ -533,12 +541,13 @@ impl Collector { fn get_common_workflow( inner: &mut CollectorInner, action_id: String, + metric_type: MetricType, labels: BTreeMap, constructor: impl FnOnce() -> MetricData, ) -> Result<&mut MetricData> { let by_name = inner .metrics_by_name - .entry(NameType::ActionId(action_id)) + .entry(NameType::ActionId(metric_type, action_id)) .or_default(); let by_name_len = by_name.len(); @@ -560,11 +569,17 @@ impl Collector { pub fn dynamic_counter(&self, tags: BTreeMap, id: &str) -> Result { let mut inner = self.inner.lock(); - match Self::get_common_workflow(&mut inner, id.to_string(), tags, || { - MetricData::Counter(Counter { - value: Arc::new(AtomicU64::new(0)), - }) - })? { + match Self::get_common_workflow( + &mut inner, + id.to_string(), + MetricType::Counter, + tags, + || { + MetricData::Counter(Counter { + value: Arc::new(AtomicU64::new(0)), + }) + }, + )? { MetricData::Counter(counter) => Ok(counter.clone()), MetricData::Histogram(_) => unreachable!(), } @@ -572,9 +587,13 @@ impl Collector { pub fn dynamic_histogram(&self, tags: BTreeMap, id: &str) -> Result { let mut inner = self.inner.lock(); - match Self::get_common_workflow(&mut inner, id.to_string(), tags, || { - MetricData::Histogram(Histogram::default()) - })? { + match Self::get_common_workflow( + &mut inner, + id.to_string(), + MetricType::Histogram, + tags, + || MetricData::Histogram(Histogram::default()), + )? { MetricData::Histogram(histogram) => Ok(histogram.clone()), MetricData::Counter(_) => unreachable!(), } diff --git a/bd-client-stats-store/src/lib_test.rs b/bd-client-stats-store/src/lib_test.rs index 2a0550e5..54f265e1 100644 --- a/bd-client-stats-store/src/lib_test.rs +++ b/bd-client-stats-store/src/lib_test.rs @@ -6,7 +6,15 @@ // https://polyformproject.org/wp-content/uploads/2020/06/PolyForm-Shield-1.0.0.txt use crate::{Collector, MetricData, NameType}; -use bd_stats_common::labels; +use bd_stats_common::{MetricType, labels}; +use std::collections::BTreeMap; + +#[test] +fn dual_type() { + let collector = Collector::default(); + let _counter = collector.dynamic_counter(BTreeMap::new(), "same_id"); + let _histogram = collector.dynamic_histogram(BTreeMap::new(), "same_id"); +} #[test] fn retain() { @@ -15,12 +23,18 @@ fn retain() { let h = collector.scope("").histogram("h"); assert!( collector - .find_counter(&NameType::Global("c".to_string()), &labels!()) + .find_counter( + &NameType::Global(MetricType::Counter, "c".to_string()), + &labels!() + ) .is_some() ); assert!( collector - .find_histogram(&NameType::Global("h".to_string()), &labels!()) + .find_histogram( + &NameType::Global(MetricType::Histogram, "h".to_string()), + &labels!() + ) .is_some() ); @@ -29,12 +43,18 @@ fn retain() { drop(h); assert!( collector - .find_counter(&NameType::Global("c".to_string()), &labels!()) + .find_counter( + &NameType::Global(MetricType::Counter, "c".to_string()), + &labels!() + ) .is_some() ); assert!( collector - .find_histogram(&NameType::Global("h".to_string()), &labels!()) + .find_histogram( + &NameType::Global(MetricType::Histogram, "h".to_string()), + &labels!() + ) .is_some() ); @@ -45,12 +65,18 @@ fn retain() { assert!( collector - .find_counter(&NameType::Global("c".to_string()), &labels!()) + .find_counter( + &NameType::Global(MetricType::Counter, "c".to_string()), + &labels!() + ) .is_none() ); assert!( collector - .find_histogram(&NameType::Global("h".to_string()), &labels!()) + .find_histogram( + &NameType::Global(MetricType::Histogram, "h".to_string()), + &labels!() + ) .is_none() ); } diff --git a/bd-client-stats-store/src/test/mod.rs b/bd-client-stats-store/src/test/mod.rs index cea38538..d45dd937 100644 --- a/bd-client-stats-store/src/test/mod.rs +++ b/bd-client-stats-store/src/test/mod.rs @@ -6,6 +6,7 @@ // https://polyformproject.org/wp-content/uploads/2020/06/PolyForm-Shield-1.0.0.txt use crate::{Collector, HistogramInner, MetricData, NameType}; +use bd_stats_common::MetricType; use std::collections::BTreeMap; pub trait StatsHelper { @@ -30,7 +31,10 @@ impl StatsHelper for Collector { assert_eq!( value, self - .find_counter(&NameType::Global(name.to_string()), &labels) + .find_counter( + &NameType::Global(MetricType::Counter, name.to_string()), + &labels + ) .unwrap_or_else(|| panic!("Counter not found: {name} {labels:?}")) .get() ); @@ -46,7 +50,10 @@ impl StatsHelper for Collector { assert_eq!( value, self - .find_counter(&NameType::ActionId(action_id.to_string()), &labels) + .find_counter( + &NameType::ActionId(MetricType::Counter, action_id.to_string()), + &labels + ) .unwrap_or_else(|| panic!("Counter not found: {action_id} {labels:?}")) .get() ); @@ -59,7 +66,10 @@ impl StatsHelper for Collector { labels: BTreeMap, ) { let histogram_values = match self - .find_histogram(&NameType::ActionId(action_id.to_string()), &labels) + .find_histogram( + &NameType::ActionId(MetricType::Histogram, action_id.to_string()), + &labels, + ) .unwrap_or_else(|| panic!("Histogram not found: {action_id} {labels:?}")) .snap() .unwrap() diff --git a/bd-client-stats/src/lib.rs b/bd-client-stats/src/lib.rs index dcc33b87..d27d9f2c 100644 --- a/bd-client-stats/src/lib.rs +++ b/bd-client-stats/src/lib.rs @@ -10,9 +10,7 @@ pub mod stats; pub mod test; use crate::stats::Flusher; -use anyhow::anyhow; use bd_api::DataUpload; -use bd_client_common::error::handle_unexpected; use bd_client_common::file_system::RealFileSystem; use bd_client_stats_store::{Collector, Error as StatsError}; use bd_runtime::runtime::ConfigLoader; @@ -162,12 +160,6 @@ impl Stats { pub fn record_dynamic_counter(&self, tags: BTreeMap, id: &str, value: u64) { match self.collector.dynamic_counter(tags, id) { Ok(counter) => counter.inc_by(value), - Err(StatsError::ChangedType) => { - handle_unexpected::<(), anyhow::Error>( - Err(anyhow!("change in dynamic metric type")), - "dynamic counter type change", - ); - }, Err(StatsError::Overflow) => { self.handle_overflow(id); }, @@ -177,12 +169,6 @@ impl Stats { pub fn record_dynamic_histogram(&self, tags: BTreeMap, id: &str, value: f64) { match self.collector.dynamic_histogram(tags, id) { Ok(histogram) => histogram.observe(value), - Err(StatsError::ChangedType) => { - handle_unexpected::<(), anyhow::Error>( - Err(anyhow!("change in dynamic metric type")), - "dynamic histogram type change", - ); - }, Err(StatsError::Overflow) => { self.handle_overflow(id); }, diff --git a/bd-client-stats/src/stats.rs b/bd-client-stats/src/stats.rs index e89c84ea..bb88eb58 100644 --- a/bd-client-stats/src/stats.rs +++ b/bd-client-stats/src/stats.rs @@ -22,7 +22,7 @@ use bd_proto::protos::client::api::stats_upload_request::snapshot::Snapshot_type use bd_proto::protos::client::metric::metric::Metric_name_type; use bd_proto::protos::client::metric::{Metric as ProtoMetric, MetricsList}; use bd_shutdown::ComponentShutdown; -use bd_stats_common::NameType; +use bd_stats_common::{MetricType, NameType}; use bd_time::TimeDurationExt; #[cfg(test)] use stats_test::{TestHooks, TestHooksReceiver}; @@ -503,16 +503,21 @@ impl SnapshotHelper { }; let mut new_metrics: MetricsByName = HashMap::new(); - for metric in metrics.metric { - let name = match metric.metric_name_type { - Some(Metric_name_type::Name(name)) => NameType::Global(name), - Some(Metric_name_type::MetricId(id)) => NameType::ActionId(id), - None => continue, - }; - - let tags = metric.tags.into_iter().collect(); - if let Some(data) = metric.data { + for proto_metric in metrics.metric { + let tags = proto_metric.tags.into_iter().collect(); + if let Some(data) = proto_metric.data { if let Some(metric) = MetricData::from_proto(data) { + let metric_type = match metric { + MetricData::Counter(_) => MetricType::Counter, + MetricData::Histogram(_) => MetricType::Histogram, + }; + + let name = match proto_metric.metric_name_type { + Some(Metric_name_type::Name(name)) => NameType::Global(metric_type, name), + Some(Metric_name_type::MetricId(id)) => NameType::ActionId(metric_type, id), + None => continue, + }; + let existing = new_metrics.entry(name).or_default().insert(tags, metric); debug_assert!(existing.is_none()); } @@ -534,7 +539,7 @@ impl SnapshotHelper { } fn add_metric(&mut self, name: NameType, labels: BTreeMap, metric: MetricData) { - let maybe_limit = if matches!(name, NameType::ActionId(_)) { + let maybe_limit = if matches!(name, NameType::ActionId(..)) { self.limit } else { None @@ -566,8 +571,8 @@ impl SnapshotHelper { .into_iter() .map(move |(labels, metric)| ProtoMetric { metric_name_type: Some(match name.clone() { - NameType::Global(name) => Metric_name_type::Name(name), - NameType::ActionId(id) => Metric_name_type::MetricId(id), + NameType::Global(_, name) => Metric_name_type::Name(name), + NameType::ActionId(_, id) => Metric_name_type::MetricId(id), }), tags: labels.into_iter().collect(), data: Some(metric.to_proto()), diff --git a/bd-stats-common/src/lib.rs b/bd-stats-common/src/lib.rs index 68864dc4..0636c94c 100644 --- a/bd-stats-common/src/lib.rs +++ b/bd-stats-common/src/lib.rs @@ -38,28 +38,38 @@ pub fn make_client_sketch() -> DDSketch { DDSketch::logarithmic_collapsing_lowest_dense(0.02, 128).unwrap() } +// +// MetricType +// + +#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug, PartialOrd, Ord)] +pub enum MetricType { + Counter, + Histogram, +} + // // NameType // #[derive(Eq, Hash, PartialEq, Clone)] pub enum NameType { - Global(String), - ActionId(String), + Global(MetricType, String), + ActionId(MetricType, String), } impl NameType { #[must_use] pub fn as_str(&self) -> &str { match self { - Self::Global(name) | Self::ActionId(name) => name, + Self::Global(_, name) | Self::ActionId(_, name) => name, } } #[must_use] pub fn into_string(self) -> String { match self { - Self::Global(name) | Self::ActionId(name) => name, + Self::Global(_, name) | Self::ActionId(_, name) => name, } } } diff --git a/bd-test-helpers/src/stats.rs b/bd-test-helpers/src/stats.rs index f04d8407..2adcd8a7 100644 --- a/bd-test-helpers/src/stats.rs +++ b/bd-test-helpers/src/stats.rs @@ -14,7 +14,7 @@ use bd_proto::protos::client::api::stats_upload_request::snapshot::{ }; use bd_proto::protos::client::metric::metric::Data; use bd_proto::protos::client::metric::{Metric, MetricsList}; -use bd_stats_common::{NameType, make_client_sketch}; +use bd_stats_common::{MetricType, NameType, make_client_sketch}; use bd_time::TimestampExt; use std::collections::{BTreeMap, HashMap}; use time::OffsetDateTime; @@ -103,8 +103,8 @@ impl StatsRequestHelper { .metric .iter() .find(|metric| match &name { - NameType::Global(name) => metric.name() == name, - NameType::ActionId(id) => metric.metric_id() == id, + NameType::Global(_, name) => metric.name() == name, + NameType::ActionId(_, id) => metric.metric_id() == id, } && metric.tags == fields_str) } @@ -112,7 +112,10 @@ impl StatsRequestHelper { #[must_use] pub fn get_inline_histogram(&self, id: &str, fields: BTreeMap<&str, &str>) -> Option> { self - .get_metric(NameType::ActionId(id.to_string()), fields) + .get_metric( + NameType::ActionId(MetricType::Histogram, id.to_string()), + fields, + ) .and_then(|metric| { metric.data.as_ref().map(|data| match data { Data::InlineHistogramValues(h) => h.values.clone(), @@ -132,7 +135,10 @@ impl StatsRequestHelper { count: u64, ) { let histogram = self - .get_metric(NameType::Global(name.to_string()), fields) + .get_metric( + NameType::Global(MetricType::Histogram, name.to_string()), + fields, + ) .and_then(|c| c.has_ddsketch_histogram().then(|| c.ddsketch_histogram())) .expect("missing histogram"); @@ -151,7 +157,10 @@ impl StatsRequestHelper { pub fn expect_inline_histogram(&self, name: &str, fields: BTreeMap<&str, &str>, values: &[f64]) { let histogram = self - .get_metric(NameType::Global(name.to_string()), fields) + .get_metric( + NameType::Global(MetricType::Histogram, name.to_string()), + fields, + ) .and_then(|c| { c.has_inline_histogram_values() .then(|| c.inline_histogram_values()) @@ -164,13 +173,19 @@ impl StatsRequestHelper { #[allow(clippy::needless_pass_by_value)] #[must_use] pub fn get_counter(&self, name: &str, fields: BTreeMap<&str, &str>) -> Option { - self.get_counter_inner(NameType::Global(name.to_string()), fields) + self.get_counter_inner( + NameType::Global(MetricType::Counter, name.to_string()), + fields, + ) } #[allow(clippy::needless_pass_by_value)] #[must_use] pub fn get_workflow_counter(&self, id: &str, fields: BTreeMap<&str, &str>) -> Option { - self.get_counter_inner(NameType::ActionId(id.to_string()), fields) + self.get_counter_inner( + NameType::ActionId(MetricType::Counter, id.to_string()), + fields, + ) } #[allow(clippy::needless_pass_by_value)] diff --git a/bd-workflows/src/config.rs b/bd-workflows/src/config.rs index 50ad8972..7aa46c69 100644 --- a/bd-workflows/src/config.rs +++ b/bd-workflows/src/config.rs @@ -16,6 +16,7 @@ use bd_proto::protos::workflow::workflow::workflow::{ LimitDuration as LimitDurationProto, LimitMatchedLogsCount, }; +use bd_stats_common::MetricType; use protobuf::MessageField; use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap}; @@ -671,16 +672,6 @@ impl ActionTakeScreenshot { } } -// -// MetricType -// - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum MetricType { - Counter, - Histogram, -} - pub type FieldKey = String; // diff --git a/bd-workflows/src/metrics.rs b/bd-workflows/src/metrics.rs index c92d4434..0da197e6 100644 --- a/bd-workflows/src/metrics.rs +++ b/bd-workflows/src/metrics.rs @@ -14,6 +14,7 @@ use crate::workflow::TriggeredActionEmitSankey; use bd_client_stats::Stats; use bd_log_primitives::LogRef; use bd_matcher::FieldProvider; +use bd_stats_common::MetricType; use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet}; use std::sync::Arc; @@ -64,13 +65,13 @@ impl MetricsCollector { }; match action.metric_type { - crate::config::MetricType::Counter => { + MetricType::Counter => { #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] self .stats .record_dynamic_counter(tags, &action.id, value as u64); }, - crate::config::MetricType::Histogram => { + MetricType::Histogram => { log::debug!("recording histogram value: {value}"); self.stats.record_dynamic_histogram(tags, &action.id, value); }, diff --git a/bd-workflows/src/metrics_test.rs b/bd-workflows/src/metrics_test.rs index 39b7bf1e..b2bf2843 100644 --- a/bd-workflows/src/metrics_test.rs +++ b/bd-workflows/src/metrics_test.rs @@ -6,12 +6,12 @@ // https://polyformproject.org/wp-content/uploads/2020/06/PolyForm-Shield-1.0.0.txt use super::MetricsCollector; -use crate::config::{ActionEmitMetric, MetricType, TagValue}; +use crate::config::{ActionEmitMetric, TagValue}; use bd_client_stats::Stats; use bd_client_stats_store::Collector; use bd_client_stats_store::test::StatsHelper; use bd_log_primitives::{FieldsRef, LogRef, LogType, log_level}; -use bd_stats_common::{NameType, labels}; +use bd_stats_common::{MetricType, NameType, labels}; use std::collections::BTreeMap; fn make_metrics_collector() -> (MetricsCollector, Collector) { @@ -87,7 +87,10 @@ fn metric_increment_value_extraction() { // No counter emitted for action_id_4 as the field does not exist. assert!( collector - .find_counter(&NameType::ActionId("action_id_4".to_string()), &labels! {}) + .find_counter( + &NameType::ActionId(MetricType::Counter, "action_id_4".to_string()), + &labels! {} + ) .is_none() ); diff --git a/bd-workflows/src/workflow_test.rs b/bd-workflows/src/workflow_test.rs index 3551e5bd..feca7d63 100644 --- a/bd-workflows/src/workflow_test.rs +++ b/bd-workflows/src/workflow_test.rs @@ -5,18 +5,11 @@ // LICENSE file or at: // https://polyformproject.org/wp-content/uploads/2020/06/PolyForm-Shield-1.0.0.txt -use crate::config::{ - ActionEmitMetric, - ActionFlushBuffers, - Config, - FlushBufferId, - MetricType, - ValueIncrement, -}; +use crate::config::{ActionEmitMetric, ActionFlushBuffers, Config, FlushBufferId, ValueIncrement}; use crate::workflow::{Run, TriggeredAction, Workflow, WorkflowResult, WorkflowResultStats}; use bd_log_primitives::{FieldsRef, LogFields, LogMessage, log_level}; use bd_proto::flatbuffers::buffer_log::bitdrift_public::fbs::logging::v_1::LogType; -use bd_stats_common::labels; +use bd_stats_common::{MetricType, labels}; use bd_test_helpers::metric_value; use bd_test_helpers::workflow::macros::{ action,