From 3f97e6bce07d6f99445e66ac18cc6764a6c8f9b0 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 27 Mar 2026 23:55:47 -0400 Subject: [PATCH] Escape U+2028/U+2029 in regex source generator XML doc comments U+2028 (Line Separator) and U+2029 (Paragraph Separator) are valid XML characters but are C# line terminators. When emitted literally into /// doc comments by the regex source generator, they break the comment across lines, causing compilation errors in the generated code. Exclude these two characters from the literal pass-through range in EscapeXmlComment so they are escaped as \u2028 and \u2029 text instead. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../gen/RegexGenerator.Emitter.cs | 3 ++- .../tests/FunctionalTests/Regex.Match.Tests.cs | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 06f6b943f381c9..9f7ac8653ac38a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -43,7 +43,8 @@ private static string EscapeXmlComment(string text) case '>': sb.Append(">"); break; // Propagate all other valid XML characters as-is. Control chars are considered invalid. - case (>= 0x20 and <= 0x7F) or (>= 0xA0 and <= 0xD7FF) or (>= 0xE000 and <= 0xFFFD): sb.Append(c); break; + // U+2028 and U+2029 are valid XML but are C# line terminators, so they'd break /// comments. + case (>= 0x20 and <= 0x7F) or (>= 0xA0 and <= 0xD7FF and not 0x2028 and not 0x2029) or (>= 0xE000 and <= 0xFFFD): sb.Append(c); break; // Use Unicode escape sequences for everything else. default: sb.Append($"\\u{(int)c:X4}"); break; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 8d4ca6edee91d0..2e8bc68f2667a8 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -46,6 +46,12 @@ public static IEnumerable Match_MemberData() // Testing control character escapes???: "2", "(\u0032)" yield return ("(\u0034)", "4", RegexOptions.None, 0, 1, true, "4"); + // Patterns with literal U+2028, U+2029, and U+FFFE to validate source generator XML doc comment escaping + yield return ("ab\u2028", "ab\u2028", RegexOptions.None, 0, 3, true, "ab\u2028"); + yield return ("ab\u2029", "ab\u2029", RegexOptions.None, 0, 3, true, "ab\u2029"); + yield return ("ab\uFFFE", "ab\uFFFE", RegexOptions.None, 0, 3, true, "ab\uFFFE"); + yield return ("[\u2028\u2029\uFFFE]", "x\u2029y", RegexOptions.None, 0, 3, true, "\u2029"); + // Using long loop prefix yield return (@"a{10}", new string('a', 10), RegexOptions.None, 0, 10, true, new string('a', 10)); yield return (@"a{100}", new string('a', 100), RegexOptions.None, 0, 100, true, new string('a', 100));