diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d4b84fa46..1334190eb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ **Internal**: - Spread out metric aggregation over the aggregation window to avoid concentrated waves of metrics requests to the upstream every 10 seconds. Relay now applies jitter to `initial_delay` to spread out requests more evenly over time. ([#1185](https://github.com/getsentry/relay/pull/1185)) -- Add new statsd metrics for bucketing efficiency ([#1199](https://github.com/getsentry/relay/pull/1199), [#1192](https://github.com/getsentry/relay/pull/1192)) +- Add new statsd metrics for bucketing efficiency ([#1199](https://github.com/getsentry/relay/pull/1199), [#1192](https://github.com/getsentry/relay/pull/1192), [#1200](https://github.com/getsentry/relay/pull/1200)) ## 22.2.0 diff --git a/relay-metrics/src/aggregation.rs b/relay-metrics/src/aggregation.rs index 9a89c6e94b..8fa4163df6 100644 --- a/relay-metrics/src/aggregation.rs +++ b/relay-metrics/src/aggregation.rs @@ -840,6 +840,7 @@ impl AggregatorConfig { /// is, buckets that lie in the past, are flushed after the shorter `debounce_delay`. fn get_flush_time(&self, bucket_timestamp: UnixTimestamp, project_key: ProjectKey) -> Instant { let now = Instant::now(); + let mut flush = None; if let MonotonicResult::Instant(instant) = bucket_timestamp.to_instant() { let bucket_end = instant + self.bucket_interval(); @@ -853,13 +854,22 @@ impl AggregatorConfig { hasher.write(project_key.as_str().as_bytes()); let shift_millis = u64::from(hasher.finish()) % (self.bucket_interval * 1000); - return initial_flush + Duration::from_millis(shift_millis); + flush = Some(initial_flush + Duration::from_millis(shift_millis)); } } + let delay = UnixTimestamp::now().as_secs() as i64 - bucket_timestamp.as_secs() as i64; + relay_statsd::metric!( + histogram(MetricHistograms::BucketsDelay) = delay as f64, + backedated = if flush.is_none() { "true" } else { "false" }, + ); + // If the initial flush time has passed or cannot be represented, debounce future flushes // with the `debounce_delay` starting now. - now + self.debounce_delay() + match flush { + Some(initial_flush) => initial_flush, + None => now + self.debounce_delay(), + } } } diff --git a/relay-metrics/src/statsd.rs b/relay-metrics/src/statsd.rs index f70c7e5e38..539a530730 100644 --- a/relay-metrics/src/statsd.rs +++ b/relay-metrics/src/statsd.rs @@ -91,6 +91,18 @@ pub enum MetricHistograms { /// BucketRelativeSize measures how many distinct values are in a bucket and therefore /// BucketRelativeSize gives you a measurement of the bucket size and complexity. BucketRelativeSize, + + /// The reporting delay at which a bucket arrives in Relay. + /// + /// A positive delay indicates the bucket arrives after its stated timestamp. Large delays + /// indicate backdating, particularly all delays larger than `bucket_interval + initial_delay`. + /// Negative delays indicate that the bucket is dated into the future, likely due to clock drift + /// on the client. + /// + /// This metric is tagged with: + /// - `backdated`: A flag indicating whether the metric was reported within the `initial_delay` + /// time period (`false`) or after the initial delay has expired (`true`). + BucketsDelay, } impl HistogramMetric for MetricHistograms { @@ -99,6 +111,7 @@ impl HistogramMetric for MetricHistograms { Self::BucketsFlushed => "metrics.buckets.flushed", Self::BucketsFlushedPerProject => "metrics.buckets.flushed_per_project", Self::BucketRelativeSize => "metrics.buckets.relative_bucket_size", + Self::BucketsDelay => "metrics.buckets.delay", } } }