Skip to content

Commit

Permalink
fix(metrics): Remove/reject nul-bytes from metric strings (#1235)
Browse files Browse the repository at this point in the history
  • Loading branch information
untitaker committed Apr 26, 2022
1 parent 48051bd commit a66c0eb
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

* Add sampling + tagging by event platform and transaction op. Some (unused) tagging rules from 22.4.0 have been renamed. ([#1231](https://github.com/getsentry/relay/pull/1231))
- Refactor aggregation error, recover from errors more gracefully. ([#1240](https://github.com/getsentry/relay/pull/1240))
- Remove/reject nul-bytes from metric strings. ([#1235](https://github.com/getsentry/relay/pull/1235))

## 22.4.0

Expand Down
4 changes: 2 additions & 2 deletions relay-general/src/protocol/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ pub struct Event {
/// can be the git SHA for the given project, or a product identifier with a semantic version.
#[metastructure(
max_chars = "tag_value", // release ends in tag
deny_chars = "\r\n\x0c\t/\\",
// release allowed chars are validated in the sentry-release-parser crate!
required = "false",
trim_whitespace = "true",
nonempty = "true",
Expand Down Expand Up @@ -374,7 +374,7 @@ pub struct Event {
/// ```
#[metastructure(
max_chars = "environment",
deny_chars = "\r\n\x0C/",
// environment allowed chars are validated in the sentry-release-parser crate!
nonempty = "true",
required = "false",
trim_whitespace = "true"
Expand Down
25 changes: 1 addition & 24 deletions relay-general/src/store/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ fn verify_value_characters(
mod tests {
use super::SchemaProcessor;
use crate::processor::{process_value, ProcessingState};
use crate::types::{Annotated, Array, Error, Object, Value};
use crate::types::{Annotated, Array, Error, Object};

fn assert_nonempty_base<T>()
where
Expand Down Expand Up @@ -148,29 +148,6 @@ mod tests {
assert_nonempty_base::<Object<u64>>();
}

#[test]
fn test_release_invalid_newlines() {
use crate::protocol::Event;

let mut event = Annotated::new(Event {
release: Annotated::new("a\nb".to_string().into()),
..Default::default()
});

process_value(&mut event, &mut SchemaProcessor, ProcessingState::root()).unwrap();

assert_eq_dbg!(
event,
Annotated::new(Event {
release: Annotated::from_error(
Error::invalid("invalid character \'\\n\'"),
Some(Value::String("a\nb".into())),
),
..Default::default()
})
);
}

#[test]
fn test_invalid_email() {
use crate::protocol::User;
Expand Down
86 changes: 81 additions & 5 deletions relay-metrics/src/aggregation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize};
use relay_common::{MonotonicResult, ProjectKey, UnixTimestamp};

use crate::statsd::{MetricCounters, MetricGauges, MetricHistograms, MetricSets, MetricTimers};
use crate::{Metric, MetricType, MetricUnit, MetricValue};
use crate::{protocol, Metric, MetricType, MetricUnit, MetricValue};

/// Interval for the flush cycle of the [`Aggregator`].
const FLUSH_INTERVAL: Duration = Duration::from_millis(100);
Expand Down Expand Up @@ -728,7 +728,11 @@ impl From<AggregateMetricsErrorKind> for AggregateMetricsError {
}

#[derive(Debug, Fail)]
#[allow(clippy::enum_variant_names)]
enum AggregateMetricsErrorKind {
/// A metric bucket had invalid characters in the metric name.
#[fail(display = "found invalid characters")]
InvalidCharacters,
/// A metric bucket's timestamp was out of the configured acceptable range.
#[fail(display = "found invalid timestamp")]
InvalidTimestamp,
Expand Down Expand Up @@ -1062,6 +1066,30 @@ impl Aggregator {
}
}

/// Remove invalid characters from tags and metric names.
///
/// Returns `Err` if the metric should be dropped.
fn validate_bucket_key(mut key: BucketKey) -> Result<BucketKey, AggregateMetricsError> {
if !protocol::is_valid_mri(&key.metric_name) {
relay_log::debug!("invalid metric name {:?}", key.metric_name);
return Err(AggregateMetricsErrorKind::InvalidCharacters.into());
}

key.tags.retain(|tag_key, _| {
if protocol::is_valid_tag_key(tag_key) {
true
} else {
relay_log::debug!("invalid metric tag key {:?}", tag_key);
false
}
});
for (_, tag_value) in key.tags.iter_mut() {
protocol::validate_tag_value(tag_value);
}

Ok(key)
}

/// Merges any mergeable value into the bucket at the given `key`.
///
/// If no bucket exists for the given bucket key, a new bucket will be created.
Expand All @@ -1073,6 +1101,8 @@ impl Aggregator {
let timestamp = key.timestamp;
let project_key = key.project_key;

let key = Self::validate_bucket_key(key)?;

match self.buckets.entry(key) {
Entry::Occupied(mut entry) => {
relay_statsd::metric!(
Expand Down Expand Up @@ -1454,7 +1484,7 @@ mod tests {

fn some_metric() -> Metric {
Metric {
name: "foo".to_owned(),
name: "c:foo".to_owned(),
unit: MetricUnit::None,
value: MetricValue::Counter(42.),
timestamp: UnixTimestamp::from_secs(999994711),
Expand Down Expand Up @@ -1780,7 +1810,7 @@ mod tests {
BucketKey {
project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
timestamp: UnixTimestamp(999994711),
metric_name: "foo",
metric_name: "c:foo",
metric_type: Counter,
metric_unit: None,
tags: {},
Expand Down Expand Up @@ -1829,7 +1859,7 @@ mod tests {
BucketKey {
project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
timestamp: UnixTimestamp(999994710),
metric_name: "foo",
metric_name: "c:foo",
metric_type: Counter,
metric_unit: None,
tags: {},
Expand All @@ -1842,7 +1872,7 @@ mod tests {
BucketKey {
project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
timestamp: UnixTimestamp(999994720),
metric_name: "foo",
metric_name: "c:foo",
metric_type: Counter,
metric_unit: None,
tags: {},
Expand Down Expand Up @@ -2101,4 +2131,50 @@ mod tests {
rounded_now + 10
);
}

#[test]
fn test_validate_bucket_key() {
let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap();

let bucket_key = BucketKey {
project_key,
timestamp: UnixTimestamp::now(),
metric_name: "c:hergus.bergus".to_owned(),
metric_type: MetricType::Counter,
metric_unit: MetricUnit::None,
tags: {
let mut tags = BTreeMap::new();
// There are some SDKs which mess up content encodings, and interpret the raw bytes
// of an UTF-16 string as UTF-8. Leading to ASCII
// strings getting null-bytes interleaved.
//
// Somehow those values end up as release tag in sessions, while in error events we
// haven't observed this malformed encoding. We believe it's slightly better to
// strip out NUL-bytes instead of dropping the tag such that those values line up
// again across sessions and events. Should that cause too high cardinality we'll
// have to drop tags.
//
// Note that releases are validated separately against much stricter character set,
// but the above idea should still apply to other tags.
tags.insert(
"is_it_garbage".to_owned(),
"a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(),
);
tags.insert("another\0garbage".to_owned(), "bye".to_owned());
tags
},
};

let mut bucket_key = Aggregator::validate_bucket_key(bucket_key).unwrap();

assert_eq!(bucket_key.tags.len(), 1);
assert_eq!(
bucket_key.tags.get("is_it_garbage"),
Some(&"absolutely".to_owned())
);
assert_eq!(bucket_key.tags.get("another\0garbage"), None);

bucket_key.metric_name = "hergus\0bergus".to_owned();
Aggregator::validate_bucket_key(bucket_key).unwrap_err();
}
}
52 changes: 49 additions & 3 deletions relay-metrics/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,44 @@ fn is_valid_name(name: &str) -> bool {
false
}

/// Validates an MRI of the form `<ty>:<ns>/<name>@<unit>`
///
/// Note that the format used in the statsd protocol is different: Metric names are not prefixed
/// with `<ty>:` as the type is somewhere else in the protocol.
pub(crate) fn is_valid_mri(name: &str) -> bool {
let mut components = name.splitn(2, ':');
if components.next().is_none() {
return false;
}

if let Some(name_unit) = components.next() {
parse_name_unit(name_unit).is_some()
} else {
false
}
}

/// Validates a tag key.
///
/// Tag keys currently only need to not contain ASCII control characters. This might change.
pub(crate) fn is_valid_tag_key(tag_key: &str) -> bool {
// iterating over bytes produces better asm, and we're only checking for ascii chars
for &byte in tag_key.as_bytes() {
if (byte as char).is_ascii_control() {
return false;
}
}
true
}

/// Validates a tag value.
///
/// Tag values are never entirely rejected, but invalid characters (ASCII control characters) are
/// stripped out.
pub(crate) fn validate_tag_value(tag_value: &mut String) {
tag_value.retain(|c| !c.is_ascii_control());
}

/// Parses the `name[@unit]` part of a metric string.
///
/// Returns [`MetricUnit::None`] if no unit is specified. Returns `None` if the name or value are
Expand Down Expand Up @@ -487,9 +525,17 @@ fn parse_tags(string: &str) -> Option<BTreeMap<String, String>> {

for pair in string.split(',') {
let mut name_value = pair.splitn(2, ':');

let name = name_value.next()?;
let value = name_value.next().unwrap_or_default();
map.insert(name.to_owned(), value.to_owned());
if !is_valid_tag_key(name) {
continue;
}

let mut value = name_value.next().unwrap_or_default().to_owned();

validate_tag_value(&mut value);

map.insert(name.to_owned(), value);
}

Some(map)
Expand Down Expand Up @@ -564,7 +610,7 @@ pub struct Metric {
/// The name of the metric without its unit.
///
/// Metric names cannot be empty, must start with a letter and can consist of ASCII
/// alphanumerics, underscores and periods.
/// alphanumerics, underscores, slashes, @s and periods.
pub name: String,
/// The unit of the metric value.
///
Expand Down

0 comments on commit a66c0eb

Please sign in to comment.