Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,14 @@ private RegexNode ReduceAtomic()
case RegexNodeKind.Nothing:
return child;

// If the child is a single character match or a multi-char string, it inherently
// can't backtrack, so the Atomic wrapper is unnecessary.
case RegexNodeKind.One:
case RegexNodeKind.Notone:
case RegexNodeKind.Set:
case RegexNodeKind.Multi:
return child;

// If the child is already atomic, we can just remove the atomic node.
case RegexNodeKind.Oneloopatomic:
case RegexNodeKind.Notoneloopatomic:
Expand Down Expand Up @@ -1379,8 +1387,10 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation)

// To keep things relatively simple, we currently only handle:
// - Left to right (e.g. we don't process alternations in lookbehinds)
// - Branches that are one or multi nodes, or that are concatenations beginning with one or multi nodes.
// - All branches having the same options.
// - Consecutive runs of branches that are one or multi nodes, or concatenations beginning with
// one or multi nodes. Non-text branches (e.g. sets, loops) are skipped but don't prevent
// later consecutive text branches from being factored.
// - All branches in a factored run having the same options.

// Only extract left-to-right prefixes.
if ((alternation.Options & RegexOptions.RightToLeft) != 0)
Expand All @@ -1395,7 +1405,8 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation)
RegexNode? startingNode = children[startingIndex].FindBranchOneOrMultiStart();
if (startingNode is null)
{
return alternation;
// Skip non-text branches; later consecutive text branches may still share a prefix.
continue;
}

RegexOptions startingNodeOptions = startingNode.Options;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,20 @@ public static IEnumerable<object[]> Match_MemberData()
// Alternations
yield return (Case("(?>hi|hello|hey)hi"), "hellohi", options, 0, 0, false, string.Empty);
yield return (Case("(?>hi|hello|hey)hi"), "hihi", options, 0, 4, true, "hihi");

// Atomic wrapping non-backtrackable nodes (reduction removes the Atomic wrapper but preserves match behavior)
yield return (Case("(?>a)b"), "ab", options, 0, 2, true, "ab");
yield return (Case("(?>a)b"), "cb", options, 0, 2, false, "");
yield return (Case("(?>[abc])x"), "bx", options, 0, 2, true, "bx");
yield return (Case("(?>abc)d"), "abcd", options, 0, 4, true, "abcd");

// Shared-prefix extraction past non-text branches
yield return (Case("[^x]|ab|ac"), "a", options, 0, 1, true, "a");
yield return (Case("[^x]|ab|ac"), "ab", options, 0, 2, true, "a");
yield return (Case("[^x]|ab|ac"), "ac", options, 0, 2, true, "a");
yield return (Case("[^x]|ab|ac"), "x", options, 0, 1, false, "");
yield return (Case("[x]|ab|ac"), "ab", options, 0, 2, true, "ab");
yield return (Case("[x]|ab|ac"), "ac", options, 0, 2, true, "ac");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ public class RegexReductionTests
[InlineData("(?>(?>(?>(?>))))", "")]
[InlineData("(?>(?>(?>(?>(?!)))))", "(?!)")]
[InlineData("(?=(?>))", "")]
// Atomic wrapping non-backtrackable nodes (One, Notone, Set, Multi) is redundant
[InlineData("(?>a)", "a")]
[InlineData("(?>[^a])", "[^a]")]
[InlineData("(?>[abc])", "[abc]")]
[InlineData("(?>abc)", "abc")]
// Lookaround reduction
[InlineData("(?!(abc))", "(?!abc)")]
[InlineData("(?!a(b*)c)", "(?!ab*c)")]
Expand Down Expand Up @@ -473,6 +478,9 @@ public class RegexReductionTests
[InlineData("(?:http|https)://foo", "http(?>s?)://foo")]
[InlineData("(?:ab|abc)d", "ab(?>c?)d")]
[InlineData("(?:abc|abcd|abce|abcfg)h", "abc(?:|[de]|fg)h")]
// Shared-prefix extraction skips non-text branches and factors later text branches
[InlineData("[^x]|ab|ac", "[^x]|a[bc]")]
[InlineData("[^x]|\\ab|\\ac", "[^x]|\\a[bc]")]
public void PatternsReduceIdentically(string actual, string expected)
{
// NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.
Expand Down
Loading