diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index d7b2a1f3a99362..1acada27f5179a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -53,9 +53,9 @@ private static string EmitRegexType(RegexType regexClass) } // Emit containing types - RegexType parent = regexClass.ParentClass; + RegexType? parent = regexClass.ParentClass; var parentClasses = new Stack(); - while (parent != null) + while (parent is not null) { parentClasses.Push($"partial {parent.Keyword} {parent.Name} {parent.Constraints}"); parent = parent.ParentClass; @@ -75,6 +75,7 @@ private static string EmitRegexType(RegexType regexClass) // Generate a name to describe the regex instance. This includes the method name // the user provided and a non-randomized (for determinism) hash of it to try to make // the name that much harder to predict. + Debug.Assert(regexClass.Method is not null); string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_"; generatedName += ComputeStringHash(generatedName).ToString("X"); @@ -104,31 +105,18 @@ static uint ComputeStringHash(string s) } /// Gets whether a given regular expression method is supported by the code generator. - private static bool SupportsCustomCodeGeneration(RegexMethod rm) - { - const RegexOptions SupportedOptions = - RegexOptions.IgnoreCase | - RegexOptions.Multiline | - RegexOptions.ExplicitCapture | - RegexOptions.Compiled | - RegexOptions.Singleline | - RegexOptions.IgnorePatternWhitespace | - RegexOptions.RightToLeft | - RegexOptions.ECMAScript | - RegexOptions.CultureInvariant; - - // If we see an option we're not aware of (but that was allowed through), don't emit custom regex code. - return (rm.Options & ~(int)SupportedOptions) == 0; - } + private static bool SupportsCustomCodeGeneration(RegexMethod rm) => + // The generator doesn't currently know how to emit code for NonBacktracking. + (rm.Options & RegexOptions.NonBacktracking) == 0; /// Generates the code for a regular expression method. private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id) { string patternExpression = Literal(rm.Pattern); - string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({rm.Options})"; + string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({(int)rm.Options})"; string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ? "global::System.Threading.Timeout.InfiniteTimeSpan" : - $"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})"; + $"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.ToString(CultureInfo.InvariantCulture)})"; writer.WriteLine(s_generatedCodeAttribute); writer.WriteLine($"{rm.Modifiers} global::System.Text.RegularExpressions.Regex {rm.MethodName}() => {id}.Instance;"); @@ -242,8 +230,8 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id) { RegexOptions options = (RegexOptions)rm.Options; - var code = rm.Code; - var lcc = code.LeadingCharClasses; + RegexCode code = rm.Code; + (string CharClass, bool CaseInsensitive)[]? lcc = code.LeadingCharClasses; bool rtl = code.RightToLeft; bool hasTextInfo = false; bool textInfoEmitted = false; @@ -523,7 +511,7 @@ void EmitAnchorAndLeadingChecks() writer.WriteLine("return true;"); } } - else if (code.LeadingCharClasses is null) + else if (lcc is null) { writer.WriteLine("return true;"); } @@ -680,7 +668,11 @@ void EmitAnchorAndLeadingChecks() private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id) { Debug.Assert(rm.Code.Tree.Root.Type == RegexNode.Capture); - if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Code.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) && + if ((rm.Options & RegexOptions.NonBacktracking) != 0) + { + EmitNonBacktrackingGo(writer, rm, id); + } + else if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Code.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) && (((RegexOptions)rm.Code.Tree.Root.Options) & RegexOptions.RightToLeft) == 0) { EmitSimplifiedGo(writer, rm, id); @@ -691,6 +683,12 @@ private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id) } } + /// Emits the body of a Go method supporting RegexOptions.NonBacktracking. + private static void EmitNonBacktrackingGo(IndentedTextWriter writer, RegexMethod rm, string id) + { + // TODO: Implement this and remove SupportsCustomCodeGeneration. + } + /// Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression. private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id) { @@ -888,7 +886,7 @@ void EmitSwitchedBranches() Debug.Assert(child.Type is RegexNode.One or RegexNode.Multi or RegexNode.Concatenate, child.Description()); Debug.Assert(child.Type is not RegexNode.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Type is RegexNode.One or RegexNode.Multi)); - RegexNode childStart = child.FindBranchOneOrMultiStart(); + RegexNode? childStart = child.FindBranchOneOrMultiStart(); Debug.Assert(childStart is not null, child.Description()); writer.WriteLine($"case {Literal(childStart.FirstCharOfOneOrMulti())}:"); @@ -1248,7 +1246,7 @@ void EmitUpdateBumpalong() } // Emits the code to handle a single-character match. - void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string offset = null) + void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset = null) { string expr = $"{textSpanLocal}[{Sum(textSpanPos, offset)}]"; switch (node.Type) @@ -1843,7 +1841,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st const string Backtrack = "Backtrack"; // label for backtracking int[] codes = rm.Code.Codes; - RegexOptions options = (RegexOptions)rm.Options.Value; + RegexOptions options = rm.Options; int labelCounter = 0; string DefineLabel(string prefix = "L") => $"{prefix}{labelCounter++}"; @@ -1919,6 +1917,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st { using (EmitBlock(writer, $"case {i}:")) { + Debug.Assert(notes is not null); BacktrackNote n = notes[i]; if (n.flags != 0) { @@ -2879,7 +2878,7 @@ void Goto(int i) /// void Trackagain() => PushTrack(currentBacktrackNote); - void PushTrack(T expr) => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); + void PushTrack(T expr) where T : notnull => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); /// Retrieves the top entry on the tracking stack without popping. string TopTrack() => "runtrack[runtrackpos]"; @@ -2896,7 +2895,7 @@ void Goto(int i) int Code() => currentOpcode & RegexCode.Mask; /// Saves the value of a local variable on the grouping stack. - void PushStack(T expr) => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); + void PushStack(T expr) where T : notnull => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); string ReadyPushStack() => "runstack[--runstackpos]"; @@ -2924,7 +2923,7 @@ int AddUniqueTrack(int i, int flags = RegexCode.Back) int NextCodepos() => currentCodePos + RegexCode.OpcodeSize(codes[currentCodePos]); /// The label for the next (forward) operation. - string AdvanceLabel() => labels![NextCodepos()]; + string AdvanceLabel() => labels[NextCodepos()]!; /// Goto the next (forward) operation. void Advance() => writer.WriteLine($"goto {AdvanceLabel()};"); @@ -2971,7 +2970,7 @@ int AddGoto(int destpos) { if (forwardJumpsThroughSwitch[destpos] == -1) { - forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels![destpos], destpos); + forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels[destpos]!, destpos); } return forwardJumpsThroughSwitch[destpos]; @@ -2998,7 +2997,7 @@ private record BacktrackNote(int flags, string label, int codepos); private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, RegexMethod rm) { - if (rm.MatchTimeout.HasValue && rm.MatchTimeout.Value != Timeout.Infinite) + if (rm.MatchTimeout != Timeout.Infinite) { writer.WriteLine("int loopTimeoutCounter = 0;"); return true; diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index 38b613c762752b..045dbd54679689 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -55,7 +55,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return null; } - TypeDeclarationSyntax typeDec = methodSyntax.Parent as TypeDeclarationSyntax; + TypeDeclarationSyntax? typeDec = methodSyntax.Parent as TypeDeclarationSyntax; if (typeDec is null) { return null; @@ -63,7 +63,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => SemanticModel sm = compilation.GetSemanticModel(methodSyntax.SyntaxTree); - IMethodSymbol regexMethodSymbol = sm.GetDeclaredSymbol(methodSyntax, cancellationToken) as IMethodSymbol; + IMethodSymbol? regexMethodSymbol = sm.GetDeclaredSymbol(methodSyntax, cancellationToken) as IMethodSymbol; if (regexMethodSymbol is null) { return null; @@ -75,10 +75,13 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return null; } - RegexMethod? regexMethod = null; + bool attributeFound = false; + string? pattern = null; + int? options = null; + int? matchTimeout = null; foreach (AttributeData attributeData in boundAttributes) { - if (!attributeData.AttributeClass.Equals(regexGeneratorAttributeSymbol)) + if (attributeData.AttributeClass?.Equals(regexGeneratorAttributeSymbol) != true) { continue; } @@ -88,7 +91,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexGeneratorAttribute, methodSyntax.GetLocation()); } - if (regexMethod is not null) + if (pattern is not null) { return Diagnostic.Create(DiagnosticDescriptors.MultipleRegexGeneratorAttributes, methodSyntax.GetLocation()); } @@ -99,20 +102,24 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexGeneratorAttribute, methodSyntax.GetLocation()); } - regexMethod = items.Length switch + attributeFound = true; + pattern = items[0].Value as string; + if (items.Length >= 2) { - 1 => new RegexMethod { Pattern = items[0].Value as string }, - 2 => new RegexMethod { Pattern = items[0].Value as string, Options = items[1].Value as int? }, - _ => new RegexMethod { Pattern = items[0].Value as string, Options = items[1].Value as int?, MatchTimeout = items[2].Value as int? }, - }; + options = items[1].Value as int?; + if (items.Length == 3) + { + matchTimeout = items[2].Value as int?; + } + } } - if (regexMethod is null) + if (!attributeFound) { return null; } - if (regexMethod.Pattern is null) + if (pattern is null) { return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)"); } @@ -130,11 +137,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return Diagnostic.Create(DiagnosticDescriptors.InvalidLangVersion, methodSyntax.GetLocation()); } - regexMethod.MethodName = regexMethodSymbol.Name; - regexMethod.Modifiers = methodSyntax.Modifiers.ToString(); - regexMethod.MatchTimeout ??= Timeout.Infinite; - RegexOptions options = regexMethod.Options.HasValue ? (RegexOptions)regexMethod.Options.Value : RegexOptions.None; - regexMethod.Options = (int)RegexOptions.Compiled | (int)options; + RegexOptions regexOptions = RegexOptions.Compiled | (options is not null ? (RegexOptions)options : RegexOptions.None); // TODO: This is going to include the culture that's current at the time of compilation. // What should we do about that? We could: @@ -143,7 +146,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => // - just use whatever culture is present at build time // - devise a new way of not using the culture present at build time // - ... - CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; + CultureInfo culture = (regexOptions & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; // Validate the options const RegexOptions SupportedOptions = @@ -154,24 +157,28 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.RightToLeft | +#if DEBUG + RegexOptions.Debug | +#endif RegexOptions.ECMAScript | - RegexOptions.CultureInvariant; - if ((regexMethod.Options.Value & ~(int)SupportedOptions) != 0) + RegexOptions.CultureInvariant | + RegexOptions.NonBacktracking; + if ((regexOptions & ~SupportedOptions) != 0) { return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "options"); } // Validate the timeout - if (regexMethod.MatchTimeout.Value is 0 or < -1) + if (matchTimeout is 0 or < -1) { return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "matchTimeout"); } // Parse the input pattern + RegexCode code; try { - RegexTree tree = RegexParser.Parse(regexMethod.Pattern, (RegexOptions)regexMethod.Options, culture); - regexMethod.Code = RegexWriter.Write(tree); + code = RegexWriter.Write(RegexParser.Parse(pattern, regexOptions, culture)); } catch (Exception e) { @@ -179,38 +186,41 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => } // Determine the namespace the class is declared in, if any - string? ns = regexMethodSymbol?.ContainingType?.ContainingNamespace?.ToDisplayString( + string? ns = regexMethodSymbol.ContainingType?.ContainingNamespace?.ToDisplayString( SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted)); - var rc = new RegexType - { - Keyword = typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText, - Namespace = ns, - Name = $"{typeDec.Identifier}{typeDec.TypeParameterList}", - Constraints = typeDec.ConstraintClauses.ToString(), - ParentClass = null, - Method = regexMethod, - }; - - RegexType current = rc; + var regexMethod = new RegexMethod( + regexMethodSymbol.Name, + methodSyntax.Modifiers.ToString(), + pattern, + regexOptions, + matchTimeout ?? Timeout.Infinite, + code); + + var regexType = new RegexType( + regexMethod, + typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText, + ns ?? string.Empty, + $"{typeDec.Identifier}{typeDec.TypeParameterList}", + typeDec.ConstraintClauses.ToString()); + + RegexType current = regexType; var parent = typeDec.Parent as TypeDeclarationSyntax; while (parent is not null && IsAllowedKind(parent.Kind())) { - current.ParentClass = new RegexType - { - Keyword = parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText, - Namespace = ns, - Name = $"{parent.Identifier}{parent.TypeParameterList}", - Constraints = parent.ConstraintClauses.ToString(), - ParentClass = null, - }; + current.ParentClass = new RegexType( + null, + parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText, + ns ?? string.Empty, + $"{parent.Identifier}{parent.TypeParameterList}", + parent.ConstraintClauses.ToString()); current = current.ParentClass; parent = parent.Parent as TypeDeclarationSyntax; } - return rc; + return regexType; static bool IsAllowedKind(SyntaxKind kind) => kind == SyntaxKind.ClassDeclaration || @@ -220,26 +230,13 @@ static bool IsAllowedKind(SyntaxKind kind) => kind == SyntaxKind.InterfaceDeclaration; } - /// A type holding a regex method. - internal sealed class RegexType - { - public RegexMethod Method; - public string Keyword = string.Empty; - public string Namespace = string.Empty; - public string Name = string.Empty; - public string Constraints = string.Empty; - public RegexType? ParentClass; - } - /// A regex method. - internal sealed class RegexMethod + internal sealed record RegexMethod(string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code); + + /// A type holding a regex method. + internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name, string Constraints) { - public string MethodName = string.Empty; - public string Pattern = string.Empty; - public int? Options; - public int? MatchTimeout; - public string Modifiers = string.Empty; - public RegexCode Code; + public RegexType? ParentClass { get; set; } } } } diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index dfed01439d81f0..56bcb17935b6f9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -40,11 +40,14 @@ public void Initialize(IncrementalGeneratorInitializationContext context) // Use a custom comparer that ignores the compilation. We want to avoid regenerating for regex methods // that haven't been changed, but any change to a regex method will change the Compilation, so we ignore // the Compilation for purposes of caching. - .WithComparer(new LambdaComparer<(MethodDeclarationSyntax, Compilation)>(static (left, right) => left.Item1.Equals(left.Item2), static o => o.Item1.GetHashCode())) + .WithComparer(new LambdaComparer<(MethodDeclarationSyntax?, Compilation)>( + static (left, right) => EqualityComparer.Default.Equals(left.Item1, right.Item1), + static o => o.Item1?.GetHashCode() ?? 0)) // Get the resulting code string or error Diagnostic for each MethodDeclarationSyntax/Compilation pair .Select((state, cancellationToken) => { + Debug.Assert(state.Item1 is not null); object? result = GetRegexTypeToEmit(state.Item2, state.Item1, cancellationToken); return result is RegexType regexType ? EmitRegexType(regexType) : result; }) @@ -79,16 +82,16 @@ public void Initialize(IncrementalGeneratorInitializationContext context) private sealed class LambdaComparer : IEqualityComparer { - private readonly Func _equal; - private readonly Func _getHashCode; + private readonly Func _equal; + private readonly Func _getHashCode; - public LambdaComparer(Func equal, Func getHashCode) + public LambdaComparer(Func equal, Func getHashCode) { _equal = equal; _getHashCode = getHashCode; } - public bool Equals(T x, T y) => _equal(x, y); + public bool Equals(T? x, T? y) => _equal(x, y); public int GetHashCode(T obj) => _getHashCode(obj); } diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index 2445333f259ce1..2212211696af9a 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -55,7 +55,7 @@ partial class C } [Theory] - [InlineData(128)] + [InlineData(0x800)] public async Task Diagnostic_InvalidRegexOptions(int options) { IReadOnlyList diagnostics = await RunGenerator(@$"