From a235fb684ca3ad1486b432681839d6f64012e1b7 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 6 Apr 2026 11:32:31 -0700 Subject: [PATCH 1/3] use explicit lookbehind in quoted string and boolean regexes --- src/sentry/grouping/parameterization.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/sentry/grouping/parameterization.py b/src/sentry/grouping/parameterization.py index 85043c06239069..f04a4f6745b3cf 100644 --- a/src/sentry/grouping/parameterization.py +++ b/src/sentry/grouping/parameterization.py @@ -311,23 +311,25 @@ def is_valid_ip(maybe_ip_str: str) -> bool: ParameterizationRegex( name="quoted_str", raw_pattern=r""" - '([^']+)' | "([^"]+)" + # Lookbehind to ensure we'll only match the value half of `=`-type key-value + # pairs, rather than all quoted strings + (?<=[=]) + ( + '([^']+)' | + "([^"]+)" + ) """, - # Using an `=` lookbehind guarantees we'll only match the value half of key-value pairs, - # rather than all quoted strings - lookbehind="=", ), ParameterizationRegex( name="bool", raw_pattern=r""" - True | - true | - False | - false + # Lookbehind to ensure we'll only match the value half of `=`-type key-value + # pairs, rather than all instances of the words 'true' and 'false' + (?<=[=]) + ( + True | true | False | false + ) """, - # Using an `=` lookbehind guarantees we'll only match the value half of key-value pairs, - # rather than all instances of the words 'true' and 'false'. - lookbehind="=", ), ] From f47ef2106b46801a0e9c04bddc4333816ef3ae10 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 6 Apr 2026 11:32:31 -0700 Subject: [PATCH 2/3] remove lookahead and lookbehind attributes from regex class --- src/sentry/grouping/parameterization.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sentry/grouping/parameterization.py b/src/sentry/grouping/parameterization.py index f04a4f6745b3cf..79679026c61afd 100644 --- a/src/sentry/grouping/parameterization.py +++ b/src/sentry/grouping/parameterization.py @@ -18,8 +18,6 @@ class ParameterizationRegex: name: str # name of the pattern (also used as group name in combined regex) raw_pattern: str # regex pattern w/o matching group name raw_pattern_experimental: str | None = None - lookbehind: str | None = None # positive lookbehind prefix if needed - lookahead: str | None = None # positive lookahead postfix if needed # Function which takes the matched value and returns the replacement value. replacement_callback: ParameterizationReplacementFunction | None = None @@ -40,9 +38,7 @@ def _get_pattern(self, raw_pattern: str) -> str: """ Returns the regex pattern with a named matching group and lookbehind/lookahead if needed. """ - prefix = rf"(?<={self.lookbehind})" if self.lookbehind else "" - postfix = rf"(?={self.lookahead})" if self.lookahead else "" - return rf"{prefix}(?P<{self.name}>{raw_pattern}){postfix}" + return rf"(?P<{self.name}>{raw_pattern})" def is_valid_ip(maybe_ip_str: str) -> bool: From c152ec0e50bbf10bc6d82f24c249084406568758 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 6 Apr 2026 11:32:31 -0700 Subject: [PATCH 3/3] fix docstring --- src/sentry/grouping/parameterization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/grouping/parameterization.py b/src/sentry/grouping/parameterization.py index 79679026c61afd..dc049f65d18457 100644 --- a/src/sentry/grouping/parameterization.py +++ b/src/sentry/grouping/parameterization.py @@ -36,7 +36,7 @@ def experimental_pattern(self) -> str | None: def _get_pattern(self, raw_pattern: str) -> str: """ - Returns the regex pattern with a named matching group and lookbehind/lookahead if needed. + Returns the regex pattern inside of a named matching group. """ return rf"(?P<{self.name}>{raw_pattern})"