diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 80ec75bda8809..a94be746767a0 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2117,10 +2117,10 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && node.Ch != subsequent.Ch: case RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch: case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): - case RegexNodeKind.Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch): - case RegexNodeKind.NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch): - case RegexNodeKind.ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch): - case RegexNodeKind.NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch): + case RegexNodeKind.Boundary when node.M > 0 && RegexCharClass.IsBoundaryWordChar(node.Ch): + case RegexNodeKind.NonBoundary when node.M > 0 && !RegexCharClass.IsBoundaryWordChar(node.Ch): + case RegexNodeKind.ECMABoundary when node.M > 0 && RegexCharClass.IsECMAWordChar(node.Ch): + case RegexNodeKind.NonECMABoundary when node.M > 0 && !RegexCharClass.IsECMAWordChar(node.Ch): // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; @@ -2163,10 +2163,10 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): - case RegexNodeKind.Boundary when node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass: - case RegexNodeKind.NonBoundary when node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass: - case RegexNodeKind.ECMABoundary when node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass: - case RegexNodeKind.NonECMABoundary when node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass: + case RegexNodeKind.Boundary when node.M > 0 && node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass: + case RegexNodeKind.NonBoundary when node.M > 0 && node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass: + case RegexNodeKind.ECMABoundary when node.M > 0 && node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass: + case RegexNodeKind.NonECMABoundary when node.M > 0 && node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass: // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs index bd0b1eab45ce3..990d489ca9464 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs @@ -292,6 +292,25 @@ public static IEnumerable Matches_TestData() } }; + foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.ECMAScript }) + { + if (RegexHelpers.IsNonBacktracking(engine)) + { + continue; + } + + yield return new object[] + { + engine, + @"a?\b", "ac", options, + new[] + { + new CaptureData("", 0, 0), + new CaptureData("", 2, 0), + } + }; + } + if (!PlatformDetection.IsNetFramework) { // .NET Framework missing fix in https://github.com/dotnet/runtime/pull/1075 diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs index dbb22cf502648..c7c8cc5a3936b 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs @@ -334,8 +334,10 @@ public class RegexReductionTests [InlineData("[^\n]*\n+", "(?>[^\n]*)(?>\n+)")] [InlineData("(a+)b", "((?>a+))b")] [InlineData("a*(?:bcd|efg)", "(?>a*)(?:bcd|efg)")] - [InlineData("\\w*\\b", "(?>\\w*)\\b")] - [InlineData("\\d*\\b", "(?>\\d*)\\b")] + [InlineData("\\w+\\b", "(?>\\w+)\\b")] + [InlineData("\\d+\\b", "(?>\\d+)\\b")] + [InlineData("\\W+\\B", "(?>\\W+)\\B")] + [InlineData("\\D+\\B", "(?>\\D+)\\B")] [InlineData("(?:abc*|def*)g", "(?:ab(?>c*)|de(?>f*))g")] [InlineData("(?:a[ce]*|b*)g", "(?:a(?>[ce]*)|(?>b*))g")] [InlineData("(?:a[ce]*|b*)c", "(?:a[ce]*|(?>b*))c")] @@ -476,6 +478,11 @@ public void PatternsReduceIdentically(string actual, string expected) [InlineData(@"\w*\b\w+", @"(?>\w*)\b\w+")] [InlineData(@"\W+\B\W+", @"(?>\W+)\B\W")] [InlineData(@"\W*\B\W+", @"(?>\W*)\B\W")] + [InlineData(@"a?\b", @"(?>a?)\b")] + [InlineData(@"\w*\b", @"(?>\w*)\b")] + [InlineData(@"\d*\b", @"(?>\d*)\b")] + [InlineData(@"\W*\B", @"(?>\W*)\B")] + [InlineData(@"\D*\B", @"(?>\D*)\B")] // Loops inside alternation constructs [InlineData("(abc*|def)chi", "(ab(?>c*)|def)chi")] [InlineData("(abc|def*)fhi", "(abc|de(?>f*))fhi")]