Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 35 additions & 34 deletions relay-event-normalization/src/regexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,51 +9,52 @@ pub static TRANSACTION_NAME_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?x)
(?P<uuid>[^/\\]*
\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b
(?-u:\b)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}(?-u:\b)
[^/\\]*) |
(?P<sha1>[^/\\]*
\b[0-9a-fA-F]{40}\b
(?-u:\b)[0-9a-fA-F]{40}(?-u:\b)
[^/\\]*) |
(?P<md5>[^/\\]*
\b[0-9a-fA-F]{32}\b
(?-u:\b)[0-9a-fA-F]{32}(?-u:\b)
[^/\\]*) |
(?P<date>[^/\\]*
(?:
(?:\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|
(?:\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|
(?:\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))
(?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]\.[0-9]+([+-][0-2][0-9]:[0-5][0-9]|Z))|
(?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))|
(?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))
) |
(?:
\b(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)\s+)?
(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+
(?:[\d]{1,2})\s+
(?:[\d]{2}:[\d]{2}:[\d]{2})\s+
[\d]{4}
(?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(?-u:\s)+)?
(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+
(?:[0-9]{1,2})(?-u:\s)+
(?:[0-9]{2}:[0-9]{2}:[0-9]{2})(?-u:\s)+
[0-9]{4}
) |
(?:
\b(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)?
(?:0[1-9]|[1-2]?[\d]|3[01])\s+
(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+
(?:19[\d]{2}|[2-9][\d]{3})\s+
(?:2[0-3]|[0-1][\d]):([0-5][\d])
(?::(60|[0-5][\d]))?\s+
(?:[-\+][\d]{2}[0-5][\d]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z]))
(?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),(?-u:\s)+)?
(?:0[1-9]|[1-2]?[0-9]|3[01])(?-u:\s)+
(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+
(?:19[0-9]{2}|[2-9][0-9]{3})(?-u:\s)+
(?:2[0-3]|[0-1][0-9]):([0-5][0-9])
(?::(60|[0-5][0-9]))?(?-u:\s)+
(?:[-\+][0-9]{2}[0-5][0-9]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z]))
)
[^/\\]*) |
(?P<hex>[^/\\]*
\b0[xX][0-9a-fA-F]+\b
(?-u:\b)0[xX][0-9a-fA-F]+(?-u:\b)
[^/\\]*) |
(?:^|[/\\])
(?P<int>
(:?[^%/\\]|%[0-9a-fA-F]{2})*\d{2,}
(:?[^%/\\]|%[0-9a-fA-F]{2})*[0-9]{2,}
[^/\\]*)",
)
.unwrap()
});

/// Captures initial all-caps words as redis command, the rest as arguments.
pub static REDIS_COMMAND_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\s*(?P<command>[A-Z]+(\s+[A-Z]+)*\b)(?P<args>.+)?").unwrap());
pub static REDIS_COMMAND_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?-u:\s)*(?P<command>[A-Z]+((?-u:\s)+[A-Z]+)*(?-u:\b))(?P<args>.+)?").unwrap()
});

/// Regex with multiple capture groups for resource tokens we should scrub.
///
Expand All @@ -65,44 +66,44 @@ pub static REDIS_COMMAND_REGEX: Lazy<Regex> =
/// <https://github.com/getsentry/sentry/blob/de5949a9a313d7ef0bf0685f84fe6e981ac38558/src/sentry/utils/performance_issues/base.py#L292-L306>
pub static RESOURCE_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?xi)
r"(?x)
# UUIDs.
(?P<uuid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) |
(?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
# Version strings.
(?P<version>(v[0-9]+(?:\.[0-9]+)*)) |
# Hexadecimal strings with more than 5 digits.
(?P<hex>[a-f0-9]{5}[a-f0-9]+) |
(?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+) |
# Integer IDs with more than one digit.
(?P<int>\d\d+)
(?P<int>[0-9][0-9]+)
",
)
.unwrap()
});

pub static DB_SQL_TRANSACTION_CORE_DATA_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?P<int>\d+)").unwrap());
Lazy::new(|| Regex::new(r"(?P<int>[0-9]+)").unwrap());

pub static DB_SUPABASE_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?xi)
r"(?x)
# UUIDs.
(?P<uuid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) |
(?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
# Hexadecimal strings with more than 5 digits.
(?P<hex>[a-f0-9]{5}[a-f0-9]+) |
(?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+) |
# Integer IDs with more than one digit.
(?P<int>\d\d+)
(?P<int>[0-9][0-9]+)
",
)
.unwrap()
});

pub static FUNCTION_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?xi)
r"(?x)
# UUIDs.
(?P<uuid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) |
(?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
# Hexadecimal strings with more than 5 digits.
(?P<hex>[a-f0-9]{5}[a-f0-9]+)
(?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+)
",
)
.unwrap()
Expand Down